Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMP OP&Test] add fp16/bf16 unittest for pool2d op #52288

Merged
merged 11 commits into from
Apr 12, 2023
Merged
28 changes: 28 additions & 0 deletions paddle/phi/kernels/funcs/pooling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ template class Pool2dDirectCUDAFunctor<AvgPool<float>, float>;
template class MaxPool2dGradFunctor<phi::GPUContext, float>;
template class MaxPool2dGradFunctor<phi::GPUContext, double>;
template class MaxPool2dGradFunctor<phi::GPUContext, dtype::float16>;
template class MaxPool2dGradFunctor<phi::GPUContext, dtype::bfloat16>;

template class Pool2dFunctor<phi::GPUContext, MaxPool<float>, float>;
template class Pool2dFunctor<phi::GPUContext, AvgPool<float>, float>;
Expand All @@ -1016,6 +1017,19 @@ template class Pool2dGradFunctor<phi::GPUContext,
AvgPoolGrad<dtype::float16>,
dtype::float16>;

template class Pool2dFunctor<phi::GPUContext,
MaxPool<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool2dFunctor<phi::GPUContext,
AvgPool<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool2dGradFunctor<phi::GPUContext,
MaxPoolGrad<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool2dGradFunctor<phi::GPUContext,
AvgPoolGrad<dtype::bfloat16>,
dtype::bfloat16>;

template <typename PoolProcess, typename T>
__global__ void KernelPool3D(const int nthreads,
const T* input_data,
Expand Down Expand Up @@ -1863,6 +1877,7 @@ template class Pool3dDirectCUDAFunctor<AvgPool<float>, float>;
template class MaxPool3dGradFunctor<phi::GPUContext, float>;
template class MaxPool3dGradFunctor<phi::GPUContext, double>;
template class MaxPool3dGradFunctor<phi::GPUContext, dtype::float16>;
template class MaxPool3dGradFunctor<phi::GPUContext, dtype::bfloat16>;

template class Pool3dFunctor<phi::GPUContext, MaxPool<float>, float>;
template class Pool3dFunctor<phi::GPUContext, AvgPool<float>, float>;
Expand All @@ -1886,6 +1901,19 @@ template class Pool3dGradFunctor<phi::GPUContext,
AvgPoolGrad<dtype::float16>,
dtype::float16>;

template class Pool3dFunctor<phi::GPUContext,
MaxPool<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool3dFunctor<phi::GPUContext,
AvgPool<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool3dGradFunctor<phi::GPUContext,
MaxPoolGrad<dtype::bfloat16>,
dtype::bfloat16>;
template class Pool3dGradFunctor<phi::GPUContext,
AvgPoolGrad<dtype::bfloat16>,
dtype::bfloat16>;

template <typename T1, typename T2>
__global__ void KernelMaxPool2dWithIdx(const int nthreads,
const T1* input_data,
Expand Down
6 changes: 5 additions & 1 deletion paddle/phi/kernels/gpu/pool_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

#include "paddle/phi/kernels/pool_grad_kernel.h"

#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/data_type.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

删除多余头文件

#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/pool_grad_kernel_impl.h"

Expand All @@ -24,7 +27,8 @@ PD_REGISTER_KERNEL(pool2d_grad,
phi::Pool2dGradKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(pool2d_double_grad,
GPU,
ALL_LAYOUT,
Expand Down
6 changes: 5 additions & 1 deletion paddle/phi/kernels/gpu/pool_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

#include "paddle/phi/kernels/pool_kernel.h"

#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/pool_kernel_impl.h"

Expand All @@ -24,7 +27,8 @@ PD_REGISTER_KERNEL(pool2d,
phi::Pool2dKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(max_pool2d_with_index,
GPU,
ALL_LAYOUT,
Expand Down
99 changes: 96 additions & 3 deletions python/paddle/fluid/tests/unittests/test_pool2d_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import unittest

import numpy as np
from eager_op_test import convert_float_to_uint16

from paddle.fluid import core
from paddle.fluid.tests.unittests.op_test import OpTest
Expand Down Expand Up @@ -304,7 +305,11 @@ def setUp(self):
self.init_data_format()
self.init_shape()

input = np.random.random(self.shape).astype(self.dtype)
if self.is_bfloat16_op():
input = np.random.random(self.shape).astype(np.float32)
else:
input = np.random.random(self.shape).astype(self.dtype)

output = pool2D_forward_naive(
input,
self.ksize,
Expand All @@ -317,8 +322,14 @@ def setUp(self):
self.data_format,
self.pool_type,
self.padding_algorithm,
).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
)

if self.is_bfloat16_op():
output = output.astype(np.float32)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output也需要调用convert_float_to_uint16

self.inputs = {'X': convert_float_to_uint16(input)}
else:
output = output.astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}

self.attrs = {
'strides': self.strides,
Expand Down Expand Up @@ -576,6 +587,40 @@ def test_check_grad(self):
globals()[cls_name] = TestFp16Case


def create_test_bf16_class(parent, check_grad=True):
@unittest.skipIf(
not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
)
class TestBf16Case(parent):
def init_kernel_type(self):
self.use_cuda = True
self.dtype = np.uint16

def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
self.check_output_with_place(
place,
atol=1e-3,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

直接使用默认atol即可,无需设置

check_dygraph=(not self.use_mkldnn),
)

def test_check_grad(self):
place = core.CUDAPlace(0)
if self.pool_type != "max" and check_grad:
self.check_grad_with_place(
place,
{'X'},
'Out',
max_relative_error=0.07,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max_relative_error需要先尝试默认值,其他的fp16单测也需要先尝试默认值

check_dygraph=(not self.use_mkldnn),
)

cls_name = "{}_{}".format(parent.__name__, "Bf16Op")
TestBf16Case.__name__ = cls_name
globals()[cls_name] = TestBf16Case


create_test_cudnn_fp16_class(TestPool2D_Op)
create_test_cudnn_fp16_class(TestCase1, check_grad=False)
create_test_cudnn_fp16_class(TestCase2)
Expand All @@ -590,6 +635,12 @@ def test_check_grad(self):
create_test_fp16_class(TestCase4)
create_test_fp16_class(TestCase5)

create_test_bf16_class(TestPool2D_Op)
create_test_bf16_class(TestCase1, check_grad=False)
create_test_bf16_class(TestCase2)
create_test_bf16_class(TestCase3)
create_test_bf16_class(TestCase4)
create_test_bf16_class(TestCase5)
# --------------------test pool2d use ceil mode--------------------


Expand Down Expand Up @@ -735,6 +786,20 @@ def init_shape(self):
create_test_cudnn_fp16_class(TestCase4_AsyPadding)
create_test_cudnn_fp16_class(TestCase5_AsyPadding)

create_test_fp16_class(TestPool2D_AsyPadding)
create_test_fp16_class(TestCase1_AsyPadding, check_grad=False)
create_test_fp16_class(TestCase2_AsyPadding)
create_test_fp16_class(TestCase3_AsyPadding)
create_test_fp16_class(TestCase4_AsyPadding)
create_test_fp16_class(TestCase5_AsyPadding)

create_test_bf16_class(TestPool2D_AsyPadding)
create_test_bf16_class(TestCase1_AsyPadding, check_grad=False)
create_test_bf16_class(TestCase2_AsyPadding)
create_test_bf16_class(TestCase3_AsyPadding)
create_test_bf16_class(TestCase4_AsyPadding)
create_test_bf16_class(TestCase5_AsyPadding)

create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding)

Expand Down Expand Up @@ -847,6 +912,20 @@ def init_shape(self):
create_test_cudnn_fp16_class(TestCase4_channel_last)
create_test_cudnn_fp16_class(TestCase5_channel_last)

create_test_fp16_class(TestPool2D_channel_last)
create_test_fp16_class(TestCase1_channel_last, check_grad=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check_grad是否必需?之前设置为true可能是因为单测框架有问题,现在可以试着去掉测下

create_test_fp16_class(TestCase2_channel_last)
create_test_fp16_class(TestCase3_channel_last)
create_test_fp16_class(TestCase4_channel_last)
create_test_fp16_class(TestCase5_channel_last)

create_test_bf16_class(TestPool2D_channel_last)
create_test_bf16_class(TestCase1_channel_last, check_grad=False)
create_test_bf16_class(TestCase2_channel_last)
create_test_bf16_class(TestCase3_channel_last)
create_test_bf16_class(TestCase4_channel_last)
create_test_bf16_class(TestCase5_channel_last)

create_test_cudnn_use_ceil_class(TestPool2D_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_channel_last)

Expand Down Expand Up @@ -964,6 +1043,20 @@ def init_shape(self):
create_test_cudnn_fp16_class(TestCase4_AsyPadding_channel_last)
create_test_cudnn_fp16_class(TestCase5_AsyPadding_channel_last)

create_test_fp16_class(TestPool2D_AsyPadding_channel_last)
create_test_fp16_class(TestCase1_AsyPadding_channel_last, check_grad=False)
create_test_fp16_class(TestCase2_AsyPadding_channel_last)
create_test_fp16_class(TestCase3_AsyPadding_channel_last)
create_test_fp16_class(TestCase4_AsyPadding_channel_last)
create_test_fp16_class(TestCase5_AsyPadding_channel_last)

create_test_bf16_class(TestPool2D_AsyPadding_channel_last)
create_test_bf16_class(TestCase1_AsyPadding_channel_last, check_grad=False)
create_test_bf16_class(TestCase2_AsyPadding_channel_last)
create_test_bf16_class(TestCase3_AsyPadding_channel_last)
create_test_bf16_class(TestCase4_AsyPadding_channel_last)
create_test_bf16_class(TestCase5_AsyPadding_channel_last)

create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last)

Expand Down