Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust tolerance with modi grad #51791

Merged
merged 23 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 165 additions & 19 deletions python/paddle/fluid/tests/unittests/eager_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def setUpClass(cls):
cls.dtype = None
cls.outputs = {}
cls.input_shape_is_large = True
cls.is_calc_ref = False
cls.check_prim = False

np.random.seed(123)
Expand Down Expand Up @@ -464,6 +465,7 @@ def is_float16_op(self):
# Make sure this function is called after calling infer_dtype_from_inputs_outputs.
return (
self.dtype == np.float16
or self.dtype == "float16"
or (
hasattr(self, 'output_dtype')
and self.output_dtype == np.float16
Expand Down Expand Up @@ -493,6 +495,18 @@ def is_xpu_op(self):
and self.attrs["use_xpu"]
)

def is_fp16_compared_with_fp32(self):
return self.is_float16_op() and (
self.op_type
not in op_accuracy_white_list.NO_FP16_COMPARED_WITH_FP32_OP_LIST
)

def enable_cal_ref_output(self):
self.is_calc_ref = self.is_fp16_compared_with_fp32()

def disable_cal_ref_output(self):
self.is_calc_ref = False

# set the self.output_dtype .
def infer_dtype_from_inputs_outputs(self, inputs, outputs):
def is_np_data(input):
Expand Down Expand Up @@ -566,19 +580,49 @@ def feed_var(self, input_vars, place):
tensor = core.LoDTensor()
if isinstance(np_value, tuple):
tensor.set(np_value[0], place)
tensor.set_recursive_sequence_lengths(np_value[1])
dtype = np.array(np_value[1]).dtype
if self.is_calc_ref and dtype == np.float16:
if isinstance(np_value[1], list):
tensor.set_recursive_sequence_lengths(
np.array(np_value[1]).astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths(
np_value[1].astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths(np_value[1])
else:
tensor.set(np_value, place)
if self.is_calc_ref and np_value.dtype == np.float16:
tensor.set(np_value.astype(np.float32), place)
else:
tensor.set(np_value, place)
feed_map[name] = tensor
else:
tensor = core.LoDTensor()
if isinstance(self.inputs[var_name], tuple):
tensor.set(self.inputs[var_name][0], place)
tensor.set_recursive_sequence_lengths(
self.inputs[var_name][1]
)
if (
self.is_calc_ref
and self.inputs[var_name][1].dtype == np.float16
):
tensor.set_recursive_sequence_lengths(
self.inputs[var_name][1].astype(np.float32)
)
else:
tensor.set_recursive_sequence_lengths(
self.inputs[var_name][1]
)
else:
tensor.set(self.inputs[var_name], place)
if (
self.is_calc_ref
and self.inputs[var_name].dtype == np.float16
):
tensor.set(
self.inputs[var_name].astype(np.float32), place
)
else:
tensor.set(self.inputs[var_name], place)
feed_map[var_name] = tensor

return feed_map
Expand All @@ -602,10 +646,10 @@ def _append_ops(self, block):
else:
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
inputs = append_input_output(
block, op_proto, self.inputs, True, self.dtype
block, op_proto, self.inputs, True, self.dtype, self.is_calc_ref
)
outputs = append_input_output(
block, op_proto, self.outputs, False, self.dtype
block, op_proto, self.outputs, False, self.dtype, self.is_calc_ref
)

if hasattr(self, "cache_name_list"):
Expand Down Expand Up @@ -725,7 +769,13 @@ def get_sequence_instance_size_0_input(self, lod=None, shape=None):
def append_input_output_for_dygraph(
self, op_proto, np_list, is_input, if_return_inputs_grad_dict, block
):
def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
def create_var(
np_value,
name,
is_input,
if_return_inputs_grad_dict,
is_calc_ref=False,
):
np_value_temp = np_value
has_lod = False
lod_temp = None
Expand All @@ -735,7 +785,13 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
lod_temp = np_value[1]

if is_input:
v = self._create_var_from_numpy(np_value_temp)
if self.is_calc_ref and np_value_temp.dtype == np.float16:
v = self._create_var_from_numpy(
np_value_temp.astype(np.float32)
)
else:
v = self._create_var_from_numpy(np_value_temp)

if if_return_inputs_grad_dict:
v.stop_gradient = False
v.retain_grads()
Expand All @@ -745,13 +801,22 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
lod_temp
)
else:
v = block.create_var(
name=name,
dtype=np_value_temp.dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False,
)
if self.is_calc_ref and np_value_temp.dtype == np.float16:
v = block.create_var(
name=name,
dtype=np.float32,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False,
)
else:
v = block.create_var(
name=name,
dtype=np_value_temp.dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False,
)
return v

# prepare variable for input or output
Expand Down Expand Up @@ -780,7 +845,11 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
slot_name = name
for (name, np_value) in np_list[name]:
v = create_var(
np_value, name, is_input, if_return_inputs_grad_dict
np_value,
name,
is_input,
if_return_inputs_grad_dict,
self.is_calc_ref,
)
var_list.append(v)
if if_return_inputs_grad_dict:
Expand All @@ -800,6 +869,7 @@ def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
name_temp,
is_input,
if_return_inputs_grad_dict,
self.is_calc_ref,
)
var_dict[name].append(v)
if if_return_inputs_grad_dict:
Expand Down Expand Up @@ -1443,6 +1513,19 @@ def find_imperative_actual(target_name, dygraph_outs, place):
"Found failed {} {}".format(dygraph_outs.keys(), target_name),
)

def find_imperative_expect(target_name, dygraph_outs, place):
for name in dygraph_outs:
if name == target_name:
return dygraph_outs[name][0]
var_list = dygraph_outs[name]
for i, var in enumerate(var_list):
if var.name == target_name:
return dygraph_outs[name][i]
self.assertTrue(
False,
"Found failed {} {}".format(dygraph_outs.keys(), target_name),
)

def find_actual(target_name, fetch_list):
found = [
i
Expand All @@ -1454,6 +1537,17 @@ def find_actual(target_name, fetch_list):
)
return found[0]

def find_expect(target_name, fetch_list):
found = [
i
for i, var_name in enumerate(fetch_list)
if var_name == target_name
]
self.assertTrue(
len(found) == 1, "Found {} {}".format(len(found), target_name)
)
return found[0]

class Checker:
"""base class for check with self.outputs.
currently don't support check between checkers.
Expand Down Expand Up @@ -1491,6 +1585,10 @@ def find_actual_value(self, name):
"""return: (actual_tensor(var_base), actual_numpy)"""
raise NotImplementedError("base class, not implement!")

def find_expect_value(self, name):
"""return: (expect_tensor(var_base), actual_numpy)"""
raise NotImplementedError("base class, not implement!")

def _compare_numpy(self, name, actual_np, expect_np):
self.op_test.assertTrue(
np.allclose(
Expand All @@ -1514,7 +1612,13 @@ def _compare_list(self, name, actual, expect):

def compare_single_output_with_expect(self, name, expect):
actual, actual_np = self.find_actual_value(name)
expect_np = expect[0] if isinstance(expect, tuple) else expect
# expect_np = expect[0] if isinstance(expect, tuple) else expect
if self.op_test.is_fp16_compared_with_fp32():
expect, expect_np = self.find_expect_value(name)
else:
expect_np = (
expect[0] if isinstance(expect, tuple) else expect
)
actual_np, expect_np = self.convert_uint16_to_float_ifneed(
actual_np, expect_np
)
Expand Down Expand Up @@ -1566,13 +1670,27 @@ def calculate_output(self):
)
self.outputs = outs
self.fetch_list = fetch_list
if self.op_test.is_fp16_compared_with_fp32():
self.op_test.enable_cal_ref_output()
ref_outs, ref_fetch_list = self.op_test._calc_output(
place, no_check_set=no_check_set
)
self.op_test.disable_cal_ref_output()
self.ref_outputs = ref_outs
self.ref_fetch_list = ref_fetch_list

def find_actual_value(self, name):
idx = find_actual(name, self.fetch_list)
actual = self.outputs[idx]
actual_t = np.array(actual)
return actual, actual_t

def find_expect_value(self, name):
idx = find_expect(name, self.ref_fetch_list)
expect = self.ref_outputs[idx]
expect_t = np.array(expect)
return expect, expect_t

def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
"""
judge whether convert current output and expect to uint16.
Expand All @@ -1584,6 +1702,8 @@ def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
]:
actual_np = convert_uint16_to_float(actual_np)
self.rtol = 1.0e-2
elif actual_np.dtype == np.float16:
self.rtol = 1.0e-3
else:
self.rtol = 1.0e-5
if (
Expand Down Expand Up @@ -1620,6 +1740,20 @@ def calculate_output(self):
)
self.outputs = dygraph_outs

if self.op_test.is_fp16_compared_with_fp32():
self.op_test.enable_cal_ref_output()
self.is_python_api_test = True
ref_dygraph_outs = self.op_test._calc_python_api_output(
place
)
if ref_dygraph_outs is None:
self.is_python_api_test = False
ref_dygraph_outs = self.op_test._calc_dygraph_output(
place, no_check_set=no_check_set
)
self.ref_outputs = ref_dygraph_outs
self.op_test.disable_cal_ref_output()

def _compare_numpy(self, name, actual_np, expect_np):
if (
functools.reduce(lambda x, y: x * y, actual_np.shape, 1)
Expand Down Expand Up @@ -1651,6 +1785,8 @@ def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
np.float64,
]:
self.rtol = 1.0e-2
elif actual_np.dtype == np.float16:
self.rtol = 1.0e-3
else:
self.rtol = 1.0e-5
if self.op_test.is_bfloat16_op():
Expand All @@ -1670,6 +1806,16 @@ def find_actual_value(self, name):
)
return imperative_actual, imperative_actual_t

def find_expect_value(self, name):
with fluid.dygraph.base.guard(place=place):
imperative_expect = find_imperative_expect(
name, self.ref_outputs, place
)
imperative_expect_t = np.array(
imperative_expect.value().get_tensor()
)
return imperative_expect, imperative_expect_t

def _compare_list(self, name, actual, expect):
"""if expect is a tuple, we need to compare list."""
with fluid.dygraph.base.guard(place=place):
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/fluid/tests/unittests/op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ def is_float16_op(self):
# Make sure this function is called after calling infer_dtype_from_inputs_outputs.
return (
self.dtype == np.float16
or self.dtype == "float16"
or (
hasattr(self, 'output_dtype')
and self.output_dtype == np.float16
Expand Down Expand Up @@ -1875,7 +1876,7 @@ def find_actual_value(self, name):
with _test_eager_guard():
return super().find_actual_value(name)

def find_expect_valur(self, name):
def find_expect_value(self, name):
with _test_eager_guard():
return super().find_expect_value(name)

Expand Down