Skip to content

Commit

Permalink
Merge branch 'main' into zhiwei/codegen
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhiweiYan-96 committed Aug 29, 2024
2 parents 5badf42 + e04c892 commit 283c6f7
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/UnaryKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ struct Expm1Functor {
template <typename T>
struct Expm1Functor<c10::complex<T>> {
c10::complex<T> operator()(c10::complex<T> x) const {
auto a = std::sin(.5 * x.imag());
auto re = std::expm1(x.real()) * std::cos(x.imag()) - 2 * a * a;
auto a = std::sin(T(.5) * x.imag());
auto re = std::expm1(x.real()) * std::cos(x.imag()) - T(2) * a * a;
auto im = std::exp(x.real()) * std::sin(x.imag());
return c10::complex<T>(re, im);
}
Expand Down
3 changes: 3 additions & 0 deletions test/xpu/run_test_with_skip_arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from skip_list_common import skip_dict
from skip_list_arc import skip_dict as skip_dict_specifical
from skip_list_win import skip_dict as skip_dict_win
from skip_list_win_arc import skip_dict as skip_dict_win_arc
from xpu_test_utils import launch_test


Expand All @@ -15,6 +16,8 @@
skip_list += skip_dict_specifical[key]
if IS_WINDOWS and key in skip_dict_win:
skip_list += skip_dict_win[key]
if IS_WINDOWS and key in skip_dict_win_arc:
skip_list += skip_dict_win_arc[key]
res += launch_test(key, skip_list)

exit_code = os.WEXITSTATUS(res)
Expand Down
187 changes: 187 additions & 0 deletions test/xpu/skip_list_win_arc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
skip_dict = {
# SYCL Compiler on Windows removed the following operations when '-cl-poison-unsupported-fp64-kernels' is on
# Hence, skip the following windows specific errors
"test_ops_xpu.py": (
"test_compare_cpu_sqrt_xpu_complex64",
"test_backward_nn_functional_adaptive_avg_pool2d_xpu_float32",
),
"test_binary_ufuncs_xpu": (
"test_batch_vs_slicing___rpow___xpu_complex64",
"test_batch_vs_slicing__refs_pow_xpu_complex64",
"test_batch_vs_slicing_pow_xpu_complex64",
"test_contig_size1___rpow___xpu_complex64",
"test_contig_size1__refs_pow_xpu_complex64",
"test_contig_size1_large_dim___rpow___xpu_complex64",
"test_contig_size1_large_dim__refs_pow_xpu_complex64",
"test_contig_size1_large_dim_pow_xpu_complex32",
"test_contig_size1_large_dim_pow_xpu_complex64",
"test_contig_size1_pow_xpu_complex32",
"test_contig_size1_pow_xpu_complex64",
"test_contig_vs_every_other___rpow___xpu_complex64",
"test_contig_vs_every_other__refs_pow_xpu_complex64",
"test_contig_vs_every_other_pow_xpu_complex32",
"test_contig_vs_every_other_pow_xpu_complex64",
"test_contig_vs_transposed___rpow___xpu_complex64",
"test_contig_vs_transposed__refs_pow_xpu_complex64",
"test_contig_vs_transposed_pow_xpu_complex32",
"test_contig_vs_transposed_pow_xpu_complex64",
"test_non_contig___rpow___xpu_complex64",
"test_non_contig__refs_pow_xpu_complex64",
"test_non_contig_expand___rpow___xpu_complex64",
"test_non_contig_expand__refs_pow_xpu_complex64",
"test_non_contig_expand_pow_xpu_complex32",
"test_non_contig_expand_pow_xpu_complex64",
"test_non_contig_index___rpow___xpu_complex64",
"test_non_contig_index__refs_pow_xpu_complex64",
"test_non_contig_index_pow_xpu_complex32",
"test_non_contig_index_pow_xpu_complex64",
"test_non_contig_pow_xpu_complex64",
),
"test_nn_xpu.py": (
"test_adaptiveavg_pool1d_shmem_xpu",
),
"test_unary_ufuncs_xpu.py": (
"test_batch_vs_slicing__refs_acos_xpu_complex64",
"test_batch_vs_slicing__refs_acosh_xpu_complex64",
"test_batch_vs_slicing__refs_log_xpu_complex64",
"test_batch_vs_slicing__refs_sqrt_xpu_complex64",
"test_batch_vs_slicing_acos_xpu_complex32",
"test_batch_vs_slicing_acos_xpu_complex64",
"test_batch_vs_slicing_acosh_xpu_complex32",
"test_batch_vs_slicing_acosh_xpu_complex64",
"test_batch_vs_slicing_log_xpu_complex32",
"test_batch_vs_slicing_log_xpu_complex64",
"test_batch_vs_slicing_sqrt_xpu_complex32",
"test_batch_vs_slicing_sqrt_xpu_complex64",
"test_batch_vs_slicing_square_xpu_complex64",
"test_contig_size1__refs_acos_xpu_complex64",
"test_contig_size1__refs_acosh_xpu_complex64",
"test_contig_size1__refs_log_xpu_complex64",
"test_contig_size1__refs_sqrt_xpu_complex64",
"test_contig_size1_acos_xpu_complex32",
"test_contig_size1_acos_xpu_complex64",
"test_contig_size1_acosh_xpu_complex32",
"test_contig_size1_acosh_xpu_complex64",
"test_contig_size1_large_dim__refs_acos_xpu_complex64",
"test_contig_size1_large_dim__refs_acosh_xpu_complex64",
"test_contig_size1_large_dim__refs_log_xpu_complex64",
"test_contig_size1_large_dim__refs_sqrt_xpu_complex64",
"test_contig_size1_large_dim_acos_xpu_complex32",
"test_contig_size1_large_dim_acos_xpu_complex64",
"test_contig_size1_large_dim_acosh_xpu_complex32",
"test_contig_size1_large_dim_acosh_xpu_complex64",
"test_contig_size1_large_dim_log_xpu_complex32",
"test_contig_size1_large_dim_log_xpu_complex64",
"test_contig_size1_large_dim_sqrt_xpu_complex32",
"test_contig_size1_large_dim_sqrt_xpu_complex64",
"test_contig_size1_large_dim_square_xpu_complex64",
"test_contig_size1_log_xpu_complex32",
"test_contig_size1_log_xpu_complex64",
"test_contig_size1_sqrt_xpu_complex32",
"test_contig_size1_sqrt_xpu_complex64",
"test_contig_size1_square_xpu_complex64",
"test_contig_vs_every_other__refs_acos_xpu_complex64",
"test_contig_vs_every_other__refs_acosh_xpu_complex64",
"test_contig_vs_every_other__refs_log_xpu_complex64",
"test_contig_vs_every_other__refs_sqrt_xpu_complex64",
"test_contig_vs_every_other_acos_xpu_complex32",
"test_contig_vs_every_other_acos_xpu_complex64",
"test_contig_vs_every_other_acosh_xpu_complex32",
"test_contig_vs_every_other_acosh_xpu_complex64",
"test_contig_vs_every_other_log_xpu_complex32",
"test_contig_vs_every_other_log_xpu_complex64",
"test_contig_vs_every_other_sqrt_xpu_complex32",
"test_contig_vs_every_other_sqrt_xpu_complex64",
"test_contig_vs_every_other_square_xpu_complex64",
"test_contig_vs_transposed__refs_acos_xpu_complex64",
"test_contig_vs_transposed__refs_acosh_xpu_complex64",
"test_contig_vs_transposed__refs_log_xpu_complex64",
"test_contig_vs_transposed__refs_sqrt_xpu_complex64",
"test_contig_vs_transposed_acos_xpu_complex32",
"test_contig_vs_transposed_acos_xpu_complex64",
"test_contig_vs_transposed_acosh_xpu_complex32",
"test_contig_vs_transposed_acosh_xpu_complex64",
"test_contig_vs_transposed_log_xpu_complex32",
"test_contig_vs_transposed_log_xpu_complex64",
"test_contig_vs_transposed_sqrt_xpu_complex32",
"test_contig_vs_transposed_sqrt_xpu_complex64",
"test_contig_vs_transposed_square_xpu_complex64",
"test_non_contig__refs_acos_xpu_complex64",
"test_non_contig__refs_acosh_xpu_complex64",
"test_non_contig__refs_log_xpu_complex64",
"test_non_contig__refs_sqrt_xpu_complex64",
"test_non_contig_acos_xpu_complex32",
"test_non_contig_acos_xpu_complex64",
"test_non_contig_acosh_xpu_complex32",
"test_non_contig_acosh_xpu_complex64",
"test_non_contig_expand__refs_acos_xpu_complex64",
"test_non_contig_expand__refs_acosh_xpu_complex64",
"test_non_contig_expand__refs_log_xpu_complex64",
"test_non_contig_expand__refs_sqrt_xpu_complex64",
"test_non_contig_expand_acos_xpu_complex32",
"test_non_contig_expand_acos_xpu_complex64",
"test_non_contig_expand_acosh_xpu_complex32",
"test_non_contig_expand_acosh_xpu_complex64",
"test_non_contig_expand_log_xpu_complex32",
"test_non_contig_expand_log_xpu_complex64",
"test_non_contig_expand_sqrt_xpu_complex32",
"test_non_contig_expand_sqrt_xpu_complex64",
"test_non_contig_expand_square_xpu_complex64",
"test_non_contig_index__refs_acos_xpu_complex64",
"test_non_contig_index__refs_acosh_xpu_complex64",
"test_non_contig_index__refs_log_xpu_complex64",
"test_non_contig_index__refs_sqrt_xpu_complex64",
"test_non_contig_index_acos_xpu_complex32",
"test_non_contig_index_acos_xpu_complex64",
"test_non_contig_index_acosh_xpu_complex32",
"test_non_contig_index_acosh_xpu_complex64",
"test_non_contig_index_log_xpu_complex32",
"test_non_contig_index_log_xpu_complex64",
"test_non_contig_index_sqrt_xpu_complex32",
"test_non_contig_index_sqrt_xpu_complex64",
"test_non_contig_index_square_xpu_complex64",
"test_non_contig_log_xpu_complex32",
"test_non_contig_log_xpu_complex64",
"test_non_contig_sqrt_xpu_complex32",
"test_non_contig_sqrt_xpu_complex64",
"test_non_contig_square_xpu_complex64",
"test_reference_numerics_extremal__refs_sqrt_xpu_complex64",
"test_reference_numerics_extremal_sqrt_xpu_complex64",
"test_reference_numerics_large__refs_acos_xpu_complex64",
"test_reference_numerics_large__refs_log_xpu_complex64",
"test_reference_numerics_large__refs_sqrt_xpu_complex64",
"test_reference_numerics_large_acos_xpu_complex32",
"test_reference_numerics_large_acos_xpu_complex64",
"test_reference_numerics_large_acosh_xpu_complex32",
"test_reference_numerics_large_log_xpu_complex32",
"test_reference_numerics_large_log_xpu_complex64",
"test_reference_numerics_large_sqrt_xpu_complex32",
"test_reference_numerics_large_sqrt_xpu_complex64",
"test_reference_numerics_normal__refs_acos_xpu_complex64",
"test_reference_numerics_normal__refs_acosh_xpu_complex64",
"test_reference_numerics_normal__refs_log_xpu_complex64",
"test_reference_numerics_normal__refs_sqrt_xpu_complex64",
"test_reference_numerics_normal_acos_xpu_complex32",
"test_reference_numerics_normal_acos_xpu_complex64",
"test_reference_numerics_normal_acosh_xpu_complex32",
"test_reference_numerics_normal_acosh_xpu_complex64",
"test_reference_numerics_normal_log_xpu_complex32",
"test_reference_numerics_normal_log_xpu_complex64",
"test_reference_numerics_normal_sqrt_xpu_complex32",
"test_reference_numerics_normal_sqrt_xpu_complex64",
"test_reference_numerics_normal_square_xpu_complex64",
"test_reference_numerics_small__refs_acos_xpu_complex64",
"test_reference_numerics_small__refs_acosh_xpu_complex64",
"test_reference_numerics_small__refs_log_xpu_complex64",
"test_reference_numerics_small__refs_sqrt_xpu_complex64",
"test_reference_numerics_small_acos_xpu_complex32",
"test_reference_numerics_small_acos_xpu_complex64",
"test_reference_numerics_small_acosh_xpu_complex32",
"test_reference_numerics_small_acosh_xpu_complex64",
"test_reference_numerics_small_log_xpu_complex32",
"test_reference_numerics_small_log_xpu_complex64",
"test_reference_numerics_small_sqrt_xpu_complex32",
"test_reference_numerics_small_sqrt_xpu_complex64",
"test_reference_numerics_small_square_xpu_complex64",
),
}
84 changes: 84 additions & 0 deletions test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,74 @@ def convert_dtype(obj, dtype, requires_grad=False):

CriterionTest.test_cuda = CriterionTest_test_xpu

from torch.testing._internal.common_methods_invocations import sample_inputs_cat_concat, S, M
from torch.testing._internal.common_methods_invocations import make_tensor
from functools import partial
from torch.testing._internal.opinfo.core import SampleInput

def reference_inputs_cat_nofp64(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_cat_concat(op, device, dtype, requires_grad, **kwargs)

make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)

# Noncontiguous type promoting tensors
a = make_arg((3, 4, 2))
#b = make_arg((3, 2, 2), noncontiguous=True, dtype=torch.double)
# for platform without fp64 support
b = make_arg((3, 2, 2), noncontiguous=True, dtype=torch.float)
c = make_arg((3, 3, 2), dtype=torch.float16).permute(1, 0, 2)

yield SampleInput((a, b, c), kwargs={'dim': 1})

# Special 1D tensor with dim length of 0 case
a = make_arg((0,))
b = make_arg((3, 2, 2))

yield SampleInput((a, b, a))
yield SampleInput((a, a, a))


def index_variable_nofp64(shape, max_indices, device=torch.device('cpu')):
if not isinstance(shape, tuple):
shape = (shape,)
#index = torch.rand(*shape, dtype=torch.double, device=device).mul_(max_indices).floor_().long()
# for platform without fp64 support
index = torch.rand(*shape, dtype=torch.float32, device=device).mul_(max_indices).floor_().long()
return index


def sample_inputs_softmax_variant_nofp64(
op_info,
device,
dtype,
requires_grad,
with_dtype=False,
use_zero_dimensions=True,
**kwargs,
):
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad
)
cases = [
((S,), (0,)),
((S, S), (0,)),
((S, S), (1,)),
((S, S), (-1,)),
((S, M, S), (2,)),
*([((S, 0, 0), (-1,))] if use_zero_dimensions else []),
]
#kwargs = dict(dtype=torch.float64) if with_dtype else None
# for platform without fp64 support
kwargs = dict(dtype=torch.float32) if with_dtype else None

# PyTorch on XLA throws an error when passed with dim argument for 0d tensor.
# See https://github.com/pytorch/xla/issues/3061 for more details.
if torch.device(device).type != "xla":
cases.append(((), (0,)))

return (
SampleInput(make_arg(shape), args=dim, kwargs=kwargs) for shape, dim in cases
)

class XPUPatchForImport:
def __init__(self, patch_test_case=True) -> None:
Expand Down Expand Up @@ -603,6 +671,11 @@ def __init__(self, patch_test_case=True) -> None:
self.cuda_is_available = cuda.is_available
self.cuda_is_bf16_supported = cuda.is_bf16_supported

if "has_fp64=0" in str(torch.xpu.get_device_properties(0)):
self.sample_inputs_softmax_variant = common_methods_invocations.sample_inputs_softmax_variant
self.index_variable = common_methods_invocations.index_variable
self.reference_inputs_cat = common_methods_invocations.reference_inputs_cat

def align_db_decorators(self, db):
def gen_xpu_wrappers(op_name, wrappers):
wrapper_xpu = []
Expand Down Expand Up @@ -669,6 +742,11 @@ def __enter__(self):

common_device_type.onlyCUDA = common_device_type.onlyXPU

if "has_fp64=0" in str(torch.xpu.get_device_properties(0)):
common_methods_invocations.sample_inputs_softmax_variant = sample_inputs_softmax_variant_nofp64
common_methods_invocations.index_variable = index_variable_nofp64
common_methods_invocations.reference_inputs_cat = reference_inputs_cat_nofp64

class dtypesIfXPU(common_device_type.dtypes):
def __init__(self, *args):
super().__init__(*args, device_type="xpu")
Expand Down Expand Up @@ -768,6 +846,7 @@ def __init__(self, *args):
cuda.is_bf16_supported = lambda: True

sys.path.extend(self.test_package)

return self

def __exit__(self, exc_type, exc_value, traceback):
Expand All @@ -790,6 +869,11 @@ def __exit__(self, exc_type, exc_value, traceback):
cuda.is_available = self.cuda_is_available
cuda.is_bf16_supported = self.cuda_is_bf16_supported

if "has_fp64=0" in str(torch.xpu.get_device_properties(0)):
common_methods_invocations.sample_inputs_softmax_variant = self.sample_inputs_softmax_variant
common_methods_invocations.index_variable = self.index_variable
common_methods_invocations.reference_inputs_cat = self.reference_inputs_cat


# Copy the test cases from generic_base_class to generic_test_class.
# It serves to reuse test cases. Regarding some newly added hardware,
Expand Down

0 comments on commit 283c6f7

Please sign in to comment.