PaddlePaddle · 0x45f · Dec 18, 2023 · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc
@@ -476,8 +476,8 @@ void BindValue(py::module *m) {
            [](Value &self, Value &op_value) {
              self.ReplaceAllUsesWith(op_value);
            })
-      .def("__eq__", &Value::operator==)
-      .def("__eq__",
+      .def("is_same", &Value::operator==)
+      .def("is_same",
            [](Value &self, OpResult &other) {
              return self.impl() == other.Value::impl();
            })
@@ -664,8 +664,8 @@ void BindOpResult(py::module *m) {
   OVERRIDE_COMPARE_OP_FOR_EACH(__gt__, greater_than);
   OVERRIDE_COMPARE_OP_FOR_EACH(__ge__, greater_equal);
 
-  op_result.def("__eq__", &OpResult::operator==)
-      .def("__eq__",
+  op_result.def("is_same", &OpResult::operator==)
+      .def("is_same",
            [](OpResult &self, Value &other) {
              return self.Value::impl() == other.impl();
            })

@@ -11,8 +11,86 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 import collections
+from typing import Any
+
+
+class ValueInDict:
+    def __init__(self, value) -> None:
+        self.value = value
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+    def __eq__(self, other) -> bool:
+        if isinstance(other, ValueInDict):
+            other = other.value
+        return self.value.is_same(other)
+
+
+class ValueDict:
+    def __init__(
+        self,
+        iter: dict[ValueInDict, Any] | None = None,
+        *,
+        default_factory=None,
+    ):
+        self._items: dict[ValueInDict, Any] = {}
+        self._default_factory = default_factory
+        if iter is not None:
+            for key, val in iter.items():
+                self[key] = val
+
+    def update(self, other_dict):
+        for key, val in other_dict:
+            self[ValueInDict(key)] = val
+
+    def keys(self):
+        return self._items.keys()
+
+    def values(self):
+        return self._items.values()
+
+    def items(self):
+        return self._items.items()
+
+    def __setitem__(self, other_key, other_val: Any):
+        self._items[other_key] = other_val
+
+    def __getitem__(self, other_key):
+        if not self.__contains__(other_key):
+            if self._default_factory is not None:
+                self[other_key] = self._default_factory()
+            else:
+                self[other_key] = None
+        return self._items[other_key]
+
+    def __and__(self, other_dict: ValueDict):
+        ret = ValueDict()
+        for key, val in self._items.items():
+            if key in other_dict:
+                ret[key] = val
+        return ret
+
+    def __or__(self, other_dict: ValueDict):
+        return ValueDict(self._items | other_dict._items)
+
+    def __bool__(self):
+        return bool(self._items)
+
+    def __len__(self):
+        return len(self._items)
+
+    def __iter__(self):
+        return self.keys()
+
+    def __contains__(self, other_key):
+        for key in self._items.keys():
+            if hash(key) == hash(other_key) and key == other_key:
+                return True
+        return False
 
 
 class State:
@@ -25,20 +103,20 @@ class State:
     def __init__(self, program):
         self.program = program
         # opresult -> list(list(opresult))
-        self.value_to_valuegrad = collections.defaultdict(list)
-        self.value_to_sumvaluegrad = collections.defaultdict(list)
+        self.value_to_valuegrad = ValueDict(default_factory=list)
+        self.value_to_sumvaluegrad = ValueDict(default_factory=list)
         # operation -> list(operation)
         self.op_to_opgrad = collections.defaultdict(list)
 
         # opresult -> list(opresult)
-        self.valuegrad_to_value = collections.defaultdict(list)
-        self.sumvaluegrad_to_value = collections.defaultdict(list)
+        self.valuegrad_to_value = ValueDict(default_factory=list)
+        self.sumvaluegrad_to_value = ValueDict(default_factory=list)
         # operation -> list(operation)
         self.opgrad_to_op = collections.defaultdict(list)
 
     def turn_map(self) -> None:
-        self.valuegrad_to_value = collections.defaultdict(list)
-        self.sumvaluegrad_to_value = collections.defaultdict(list)
+        self.valuegrad_to_value = ValueDict(default_factory=list)
+        self.sumvaluegrad_to_value = ValueDict(default_factory=list)
         self.opgrad_to_op = collections.defaultdict(list)
 
         for k, v in self.value_to_valuegrad.items():

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import collections
 import logging
 from collections.abc import Sequence
@@ -27,6 +29,63 @@
 __all__ = ['grad', 'calc_gradient', 'calc_gradient_helper']
 
 
+class ValueInSet:
+    def __init__(self, value) -> None:
+        self.value = value
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+    def __eq__(self, other) -> bool:
+        if isinstance(other, ValueInSet):
+            other = other.value
+        return self.value.is_same(other)
+
+
+class ValueSet:
+    def __init__(
+        self, iter: Sequence[ValueInSet] | set[ValueInSet] | None = None
+    ):
+        self._values: set[ValueInSet] = set()
+        if iter is not None:
+            for val in iter:
+                self.add(val)
+
+    def add(self, other_val):
+        other_val = ValueInSet(other_val)
+        if not self.__contains__(other_val):
+            self._values.add(other_val)
+
+    def update(self, other_set: set):
+        for val in other_set:
+            self.add(ValueInSet(val))
+
+    def __and__(self, other_set: ValueSet):
+        ret = ValueSet()
+        for val in self._values:
+            if val in other_set:
+                ret.add(val)
+        return ret
+
+    def __or__(self, other_set: ValueSet):
+        return ValueSet(self._values | other_set._values)
+
+    def __bool__(self):
+        return bool(self._values)
+
+    def __len__(self):
+        return len(self._values)
+
+    def __iter__(self):
+        return iter(self._values)
+
+    def __contains__(self, other_val):
+        for value in self._values:
+            if hash(value) == hash(other_val) and value == other_val:
+                return True
+        return False
+
+
 def check_type(input, input_name, expected_type, op_name, extra_message=''):
     if not isinstance(input, expected_type):
         raise TypeError(
@@ -124,7 +183,7 @@ def prepare_grad_outputs(grad_outputs, outputs, state):
     complete_outputs = outputs
     complete_gradoutputs = grad_outputs
 
-    visited_output = set()
+    visited_output = ValueSet()
     for output in outputs:
         if output in visited_output:
             continue
@@ -157,7 +216,7 @@ def prepare_grad_outputs(grad_outputs, outputs, state):
 
 def some_in_set(value_list, value_set):
     def operand2value(values):
-        value_set = set()
+        value_set = ValueSet()
         for item in values:
             if isinstance(item, paddle.pir.OpOperand):
                 value_set.add(item.source())
@@ -245,7 +304,7 @@ def update_no_grad_set_after_prune(
     from inputs to outputs add value not in the path to no_grad_set,
     from outputs to inputs add value not in the path to no_grad_set,
     '''
-    inputs_set = set(inputs)
+    inputs_set = ValueSet(inputs)
     if inputs_set:
         for op in block.ops:
             if some_in_set(op.operands_source(), inputs_set):
@@ -258,12 +317,12 @@ def update_no_grad_set_after_prune(
                 if value not in inputs_set:
                     no_grad_set.add(value)
 
-    outputs_set = set(outputs)
-    no_grad_set_tmp = set()
+    outputs_set = ValueSet(outputs)
+    no_grad_set_tmp = ValueSet()
     for op in reversed(effective_forward_ops):
         for output in op.results():
             if output not in outputs_set and not some_in_set(
-                [output], set(op.operands_source())
+                [output], ValueSet(op.operands_source())
             ):
                 no_grad_set_tmp.add(output)
 
@@ -317,7 +376,7 @@ def inverse_sort_op(ops):
 
 
 def append_backward_ops(
-    block, effective_forward_ops, no_grad_set, backward_ops, state
+    block, effective_forward_ops, no_grad_set, backward_ops, state: State
 ):
     '''
     add grad_op in order of topological inverse sort
@@ -577,7 +636,7 @@ def update_input_grad_map(op, input_grads):
 
 
 def create_backward_prune_set(inputs, outputs, no_grad_set, state):
-    outputs_set = set()
+    outputs_set = ValueSet()
     for input_ in inputs:
         if not input_.use_empty():
             for item in input_.first_use().owner().operands_source():
@@ -586,18 +645,18 @@ def create_backward_prune_set(inputs, outputs, no_grad_set, state):
         else:
             logging.warning("input privided by inputs has no use")
 
-    inputs_set = set()
+    inputs_set = ValueSet()
     for output in outputs:
         if state.value_to_valuegrad[output] != []:
             inputs_set.add(state.value_to_valuegrad[output][0][0])
-    inputs_set_tmp = set()
+    inputs_set_tmp = ValueSet()
     for out_grad in inputs_set:
         if not out_grad.use_empty():
             for item in out_grad.first_use().owner().operands_source():
                 inputs_set_tmp.add(item)
     inputs_set.update(inputs_set_tmp)
 
-    no_gradvar_set = set()  # grad_value of value in no_grad_set
+    no_gradvar_set = ValueSet()  # grad_value of value in no_grad_set
     for key in state.value_to_valuegrad:
         if key in no_grad_set and state.value_to_valuegrad[key] != []:
             no_gradvar_set.add(state.value_to_valuegrad[key][0][0])
@@ -640,8 +699,8 @@ def calc_gradient_helper(outputs, inputs, grad_outputs, no_grad_set):
         grad_outputs, outputs, state
     )
 
-    inputs_set = set(inputs)
-    outputs_set = set(complete_outputs)
+    inputs_set = ValueSet(inputs)
+    outputs_set = ValueSet(complete_outputs)
     effective_forward_ops, _ = prune_ops(
         block.ops, inputs_set, outputs_set, no_grad_set
     )
@@ -690,7 +749,7 @@ def calc_gradient(outputs, inputs, grad_outputs, no_grad_set):
             be: (1) a Value filled with 1 when the i-th element of `grad_outputs`
             is None; (2) the i-th element of `grad_outputs` when the i-th element of
             `grad_outputs` is a Value. Default None.
-        no_grad_set (set(Value), optional):
+        no_grad_set (list(Value)|tuple(Value), optional):
             the Values whose gradients are not needed to compute. Default None.
 
     Return:
@@ -701,7 +760,10 @@ def calc_gradient(outputs, inputs, grad_outputs, no_grad_set):
     """
     # record input value and its gradient (Value to Value)
     input_to_inputgrad_map = calc_gradient_helper(
-        outputs, inputs, grad_outputs=grad_outputs, no_grad_set=no_grad_set
+        outputs,
+        inputs,
+        grad_outputs=grad_outputs,
+        no_grad_set=ValueSet(no_grad_set),
     )
 
     inputgrad = []
@@ -764,7 +826,7 @@ def grad(
             `inputs` are unreachable in the graph (i.e., their gradients are None),
             error would be raised if allow_unused=False, or None would be returned as
             their gradients if allow_unused=True. Default False.
-        no_grad_vars (Value|list(Value)|tuple(Value)|set(Value), optional):
+        no_grad_vars (Value|list(Value)|tuple(Value), optional):
             the Values whose gradients are not needed to compute. Default None.
 
     Returns:
@@ -794,18 +856,16 @@ def grad(
     check_type(
         no_grad_vars,
         'no_grad_vars',
-        ((paddle.pir.Value, paddle.pir.OpResult), list, tuple, set, type(None)),
+        ((paddle.pir.Value, paddle.pir.OpResult), list, tuple, type(None)),
         'paddle.autograd.ir_backward.grad',
     )
     outputs = _as_list(outputs)
     inputs = _as_list(inputs)
     grad_outputs = _as_list(grad_outputs)
     if no_grad_vars is None:
-        no_grad_set = set()
-    elif no_grad_vars is not set:
-        no_grad_set = set(no_grad_vars)
+        no_grad_set = ValueSet()
     else:
-        no_grad_set = no_grad_vars
+        no_grad_set = ValueSet(no_grad_vars)
 
     input_grad = calc_gradient(outputs, inputs, grad_outputs, no_grad_set)
 

@@ -2653,7 +2653,6 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
                 (paddle.pir.Value, paddle.pir.OpResult),
                 list,
                 tuple,
-                set,
                 type(None),
             ),
             'paddle.autograd.ir_backward.grad',
@@ -2662,11 +2661,7 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
         inputs = _as_list(inputs)
         target_gradients = _as_list(target_gradients)
         if no_grad_set is None:
-            no_grad_set = set()
-        elif no_grad_set is not set:
-            no_grad_set = set(no_grad_set)
-        else:
-            no_grad_set = no_grad_set
+            no_grad_set = []
         from paddle.autograd.ir_backward import (
             calc_gradient as pir_calc_gradient,
         )