diff --git a/src/ATen/native/xpu/Activation.cpp b/src/ATen/native/xpu/Activation.cpp
index cc9b7319a..96c1a2bf9 100644
--- a/src/ATen/native/xpu/Activation.cpp
+++ b/src/ATen/native/xpu/Activation.cpp
@@ -7,7 +7,6 @@
 #include <ATen/xpu/ops/gelu_backward_native.h>
 #include <ATen/xpu/ops/gelu_native.h>
 
-// #include <ATen/xpu/XPUNativeFunctions.h>
 #include <ATen/native/xpu/sycl/ActivationGeluKernel.h>
 #include <ATen/native/xpu/sycl/ActivationThresholdKernel.h>
 
@@ -15,9 +14,6 @@ namespace at {
 
 namespace native {
 REGISTER_XPU_DISPATCH(threshold_stub, xpu::threshold_kernel);
-// REGISTER_XPU_DISPATCH(GeluKernel, xpu::gelu_kernel);
-// REGISTER_XPU_DISPATCH(GeluBackwardKernel, xpu::gelu_backward_kernel);
-
 TORCH_IMPL_FUNC(gelu_backward_out_xpu)
 (const Tensor& /*grad*/,
  const Tensor& /*self*/,
@@ -34,160 +30,4 @@ TORCH_IMPL_FUNC(gelu_out_xpu)
 }
 
 } // namespace native
-
-// Tensor XPUNativeFunctions::relu(const Tensor& self) {
-//   TORCH_CHECK(
-//       self.scalar_type() != at::kBool, "Boolean inputs not supported for
-//       relu");
-//   return at::clamp_min(self, 0);
-// }
-
-// Tensor& XPUNativeFunctions::relu_(Tensor& self) {
-//   TORCH_CHECK(
-//       self.scalar_type() != at::kBool, "Boolean inputs not supported for
-//       relu");
-//   return at::clamp_min_(self, 0);
-// }
-
-// Tensor& XPUNativeFunctions::relu_out(const Tensor& self, Tensor& out) {
-//   TORCH_CHECK(
-//       self.scalar_type() != at::kBool, "Boolean inputs not supported for
-//       relu");
-//   return at::clamp_min_out(out, self, 0);
-// }
-
-// TensorIterator threshold_meta(
-//     const Tensor& self,
-//     const Scalar& threshold,
-//     const Scalar& value,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build(TensorIteratorConfig()
-//                  .set_check_mem_overlap(
-//                      false) // threshold is idempotent, so overlap is okay
-//                  .add_output(out)
-//                  .add_const_input(self)
-//                  .add_const_input(self) // other
-//                  .allow_cpu_scalars(true)
-//                  .promote_inputs_to_common_dtype(true)
-//                  .cast_common_dtype_to_outputs(true)
-//                  .enforce_safe_casting_to_output(true));
-//   return iter;
-// }
-
-// Tensor XPUNativeFunctions::threshold(
-//     const Tensor& self,
-//     const Scalar& threshold,
-//     const Scalar& value) {
-//   Tensor out;
-//   auto iter = threshold_meta(self, threshold, value, out);
-//   native::xpu::threshold_kernel(iter, threshold, value);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::threshold_(
-//     Tensor& self,
-//     const Scalar& threshold,
-//     const Scalar& value) {
-//   auto iter = threshold_meta(self, threshold, value, self);
-//   native::xpu::threshold_kernel(iter, threshold, value);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::threshold_out(
-//     const Tensor& self,
-//     const Scalar& threshold,
-//     const Scalar& value,
-//     Tensor& out) {
-//   auto iter = threshold_meta(self, threshold, value, out);
-//   native::xpu::threshold_kernel(iter, threshold, value);
-//   return out;
-// }
-
-// TensorIterator threshold_backward_meta(
-//     const Tensor& grad,
-//     const Tensor& self,
-//     const Scalar& threshold,
-//     Tensor& gradInput) {
-//   TensorIterator iter;
-//   iter.build(TensorIteratorConfig()
-//                  .set_check_mem_overlap(
-//                      false) // threshold is idempotent, so overlap is okay
-//                  .add_output(gradInput)
-//                  .add_input(self)
-//                  .add_input(grad) // other
-//                  .allow_cpu_scalars(true)
-//                  .promote_inputs_to_common_dtype(true)
-//                  .cast_common_dtype_to_outputs(true)
-//                  .enforce_safe_casting_to_output(true));
-//   return iter;
-// }
-
-// Tensor XPUNativeFunctions::threshold_backward(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     const Scalar& threshold) {
-//   Tensor grad_input;
-//   auto iter = threshold_backward_meta(grad_output, self, threshold,
-//   grad_input); native::xpu::threshold_kernel(iter, threshold, 0); return
-//   iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::threshold_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     const Scalar& threshold,
-//     Tensor& grad_input) {
-//   auto iter = threshold_backward_meta(grad_output, self, threshold,
-//   grad_input); native::xpu::threshold_kernel(iter, threshold, 0); return
-//   grad_input;
-// }
-
-// Tensor XPUNativeFunctions::gelu(
-//     const Tensor& self,
-//     c10::string_view approximate) {
-//   Tensor out;
-//   auto iter = TensorIterator::unary_op(out, self);
-//   native::xpu::gelu_kernel(iter, approximate);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::gelu_(Tensor& self, c10::string_view approximate)
-// {
-//   auto iter = TensorIterator::unary_op(self, self);
-//   native::xpu::gelu_kernel(iter, approximate);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::gelu_out(
-//     const Tensor& self,
-//     c10::string_view approximate,
-//     Tensor& out) {
-//   auto iter = TensorIterator::unary_op(out, self);
-//   native::xpu::gelu_kernel(iter, approximate);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::gelu_backward(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     c10::string_view approximate) {
-//   Tensor grad_input;
-//   auto iter =
-//       TensorIterator::borrowing_binary_op(grad_input, grad_output, self);
-//   native::xpu::gelu_backward_kernel(iter, approximate);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::gelu_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     c10::string_view approximate,
-//     Tensor& grad_input) {
-//   auto iter =
-//       TensorIterator::borrowing_binary_op(grad_input, grad_output, self);
-//   native::xpu::gelu_backward_kernel(iter, approximate);
-//   return grad_input;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/BinaryOps.cpp b/src/ATen/native/xpu/BinaryOps.cpp
index f83e29697..1762cbd9d 100644
--- a/src/ATen/native/xpu/BinaryOps.cpp
+++ b/src/ATen/native/xpu/BinaryOps.cpp
@@ -33,574 +33,4 @@ REGISTER_XPU_DISPATCH(minimum_stub, xpu::minimum_kernel);
 REGISTER_XPU_DISPATCH(sigmoid_backward_stub, xpu::sigmoid_backward_kernel);
 
 } // namespace native
-
-// Tensor XPUNativeFunctions::add(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::add_kernel(iter, alpha);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::add_(
-//     Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::add_kernel(iter, alpha);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::add_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::add_kernel(iter, alpha);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::add(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::add(self, wrapper, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::add_(
-//     Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::add_(self, wrapper, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::add_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::add_out(self, wrapper, alpha, out);
-// }
-
-// Tensor XPUNativeFunctions::sub(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   Tensor out;
-//   native::sub_check(self, other);
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::sub_kernel(iter, alpha);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::sub_(
-//     Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   native::sub_check(self, other);
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::sub_kernel(iter, alpha);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::sub_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   native::sub_check(self, other);
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::alpha_check(iter.dtype(), alpha);
-//   native::xpu::sub_kernel(iter, alpha);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::sub(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::sub(self, wrapper, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::sub_(
-//     Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::sub_(self, wrapper, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::sub_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::sub_out(self, wrapper, alpha, out);
-// }
-
-// Tensor XPUNativeFunctions::mul(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::mul_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::mul_(Tensor& self, const Tensor& other) {
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::xpu::mul_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::mul_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::mul_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::mul(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::mul(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::mul_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::mul_(self, wrapper);
-// }
-
-// Tensor XPUNativeFunctions::div(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(out, self, other);
-//   native::xpu::div_true_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::div_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(self, self, other);
-//   native::xpu::div_true_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::div_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(out, self, other);
-//   native::xpu::div_true_kernel(iter);
-//   return out;
-// }
-// Tensor& XPUNativeFunctions::mul_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::mul_out(self, wrapper, out);
-// }
-
-// Tensor XPUNativeFunctions::div(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(out, self, other);
-//   native::xpu::div_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::div_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(self, self, other);
-//   native::xpu::div_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::div_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_float_op(out, self, other);
-//   native::xpu::div_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::div(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::div(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::div_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::div_(self, wrapper);
-// }
-
-// static inline TensorIterator meta_func_div_Tensor_mode(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Tensor& output,
-//     c10::optional<c10::string_view> rounding_mode) {
-//   TensorIterator iter;
-//   if (!rounding_mode.has_value()) {
-//     iter.build_borrowing_binary_float_op(output, self, other);
-//     // NOLINTNEXTLINE(bugprone-branch-clone)
-//   } else if (*rounding_mode == "trunc") {
-//     iter.build_borrowing_binary_op(output, self, other);
-//   } else if (*rounding_mode == "floor") {
-//     iter.build_borrowing_binary_op(output, self, other);
-//   } else {
-//     TORCH_CHECK(
-//         false,
-//         "div expected rounding_mode to be one of None, 'trunc', or 'floor'
-//         " "but found '", *rounding_mode,
-//         "'");
-//   }
-//   return iter;
-// }
-
-// static inline void impl_func_div_Tensor_mode(
-//     TensorIterator& iter,
-//     ::std::optional<c10::string_view> rounding_mode) {
-//   if (!rounding_mode.has_value()) {
-//     native::xpu::div_true_kernel(iter);
-//   } else if (*rounding_mode == "trunc") {
-//     native::xpu::div_trunc_kernel(iter);
-//   } else if (*rounding_mode == "floor") {
-//     native::xpu::div_floor_kernel(iter);
-//   }
-// }
-
-// Tensor XPUNativeFunctions::div(
-//     const at::Tensor& self,
-//     const at::Tensor& other,
-//     ::std::optional<c10::string_view> rounding_mode) {
-//   Tensor output;
-//   TensorIterator iter =
-//       meta_func_div_Tensor_mode(self, other, output, rounding_mode);
-//   impl_func_div_Tensor_mode(iter, rounding_mode);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::div_(
-//     at::Tensor& self,
-//     const at::Tensor& other,
-//     ::std::optional<c10::string_view> rounding_mode) {
-//   TensorIterator iter =
-//       meta_func_div_Tensor_mode(self, other, self, rounding_mode);
-//   impl_func_div_Tensor_mode(iter, rounding_mode);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::div_out(
-//     const at::Tensor& self,
-//     const at::Tensor& other,
-//     ::std::optional<c10::string_view> rounding_mode,
-//     at::Tensor& output) {
-//   TensorIterator iter =
-//       meta_func_div_Tensor_mode(self, other, output, rounding_mode);
-//   impl_func_div_Tensor_mode(iter, rounding_mode);
-//   return output;
-// }
-
-// Tensor XPUNativeFunctions::rsub(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   return XPUNativeFunctions::sub(other, self, alpha);
-// }
-// Tensor& XPUNativeFunctions::div_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::div_out(self, wrapper, out);
-// }
-
-// Tensor XPUNativeFunctions::rsub(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha) {
-//   return XPUNativeFunctions::sub(other, self, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::rsub_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   return XPUNativeFunctions::sub_out(other, self, alpha, out);
-// }
-
-// Tensor XPUNativeFunctions::rsub(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha) {
-//   return XPUNativeFunctions::sub(
-//       native::wrapped_scalar_tensor(other), self, alpha);
-// }
-
-// Tensor& XPUNativeFunctions::rsub_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   return XPUNativeFunctions::sub_out(
-//       native::wrapped_scalar_tensor(other), self, alpha, out);
-// }
-
-// Tensor XPUNativeFunctions::remainder(const Tensor& self, const Tensor&
-// other)
-// {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::remainder_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::remainder_(Tensor& self, const Tensor& other) {
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::xpu::remainder_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::remainder_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::remainder_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::remainder(const Tensor& self, const Scalar&
-// other)
-// {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::remainder(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::remainder_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::remainder_(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::remainder_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::remainder_out(self, wrapper, out);
-// }
-
-// Tensor XPUNativeFunctions::remainder(const Scalar& self, const Tensor&
-// other)
-// {
-//   auto wrapper = native::wrapped_scalar_tensor(self);
-//   return XPUNativeFunctions::remainder(wrapper, other);
-// }
-
-// Tensor& XPUNativeFunctions::remainder_out(
-//     const Scalar& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(self);
-//   return XPUNativeFunctions::remainder_out(wrapper, other, out);
-// }
-
-// Tensor XPUNativeFunctions::fmod(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::fmod_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::fmod_(Tensor& self, const Tensor& other) {
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::xpu::fmod_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::fmod_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::fmod_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::fmod(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::fmod(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::fmod_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::fmod_(self, wrapper);
-// }
-
-// Tensor& XPUNativeFunctions::fmod_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   return XPUNativeFunctions::fmod_out(self, wrapper, out);
-// }
-
-// Tensor XPUNativeFunctions::tanh_backward(
-//     const Tensor& grad_output,
-//     const Tensor& output) {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, grad_output,
-//   output); native::xpu::tanh_backward_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::tanh_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& output,
-//     Tensor& grad_input) {
-//   auto iter =
-//       TensorIterator::borrowing_binary_op(grad_input, grad_output, output);
-//   native::xpu::tanh_backward_kernel(iter);
-//   return grad_input;
-// }
-
-// Tensor& XPUNativeFunctions::bitwise_and_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::bitwise_and_kernel(iter);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::bitwise_or_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::bitwise_or_kernel(iter);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::bitwise_xor_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::bitwise_xor_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::gcd(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::gcd_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::gcd_(Tensor& self, const Tensor& other) {
-//   auto iter = TensorIterator::borrowing_binary_op(self, self, other);
-//   native::xpu::gcd_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::gcd_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   auto iter = TensorIterator::borrowing_binary_op(out, self, other);
-//   native::xpu::gcd_kernel(iter);
-//   return out;
-// }
-
-// static inline TensorIterator meta_func_maximum(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& output) {
-//   TORCH_CHECK(
-//       !self.is_complex() && !other.is_complex(),
-//       "maximum not implemented for complex tensors.");
-//   auto iter = TensorIterator::borrowing_binary_op(output, self, other);
-//   return iter;
-// }
-
-// Tensor XPUNativeFunctions::maximum(const Tensor& self, const Tensor& other) {
-//   Tensor output;
-//   auto iter = meta_func_maximum(self, other, output);
-//   native::xpu::maximum_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::maximum_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& output) {
-//   auto iter = meta_func_maximum(self, other, output);
-//   native::xpu::maximum_kernel(iter);
-//   return output;
-// }
-
-// static inline TensorIterator meta_func_minimum(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& output) {
-//   TORCH_CHECK(
-//       !self.is_complex() && !other.is_complex(),
-//       "minimum not implemented for complex tensors.");
-//   auto iter = TensorIterator::borrowing_binary_op(output, self, other);
-//   return iter;
-// }
-
-// Tensor XPUNativeFunctions::minimum(const Tensor& self, const Tensor& other) {
-//   Tensor output;
-//   auto iter = meta_func_minimum(self, other, output);
-//   native::xpu::minimum_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::minimum_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& output) {
-//   auto iter = meta_func_minimum(self, other, output);
-//   native::xpu::minimum_kernel(iter);
-//   return output;
-// }
-
-// Tensor& XPUNativeFunctions::sigmoid_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& output,
-//     Tensor& grad_input) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_op(grad_input, grad_output, output);
-//   native::xpu::sigmoid_backward_kernel(iter);
-//   return grad_input;
-// }
-
-// Tensor XPUNativeFunctions::sigmoid_backward(
-//     const Tensor& grad_output,
-//     const Tensor& output) {
-//   Tensor grad_input;
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_op(grad_input, grad_output, output);
-//   native::xpu::sigmoid_backward_kernel(iter);
-//   return iter.output();
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/CompareOps.cpp b/src/ATen/native/xpu/CompareOps.cpp
index 9144160b4..b5376b58d 100644
--- a/src/ATen/native/xpu/CompareOps.cpp
+++ b/src/ATen/native/xpu/CompareOps.cpp
@@ -17,313 +17,4 @@ REGISTER_XPU_DISPATCH(gt_stub, xpu::gt_kernel);
 REGISTER_XPU_DISPATCH(ge_stub, xpu::ge_kernel);
 // REGISTER_XPU_DISPATCH(isnan_stub, xpu::isnan_out)
 } // namespace native
-
-// Tensor XPUNativeFunctions::eq(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::eq_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::eq_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::eq_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::eq_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::eq_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::eq(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::eq_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::eq_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::eq_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::eq_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::eq_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::ne(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::ne_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::ne_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::ne_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::ne_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::ne_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::ne(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::ne_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::ne_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::ne_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::ne_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::ne_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::lt(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::lt_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::lt_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::lt_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::lt_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::lt_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::lt(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::lt_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::lt_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::lt_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::lt_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::lt_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::le(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::le_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::le_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::le_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::le_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::le_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::le(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::le_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::le_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::le_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::le_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::le_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::gt(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::gt_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::gt_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::gt_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::gt_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::gt_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::gt(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::gt_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::gt_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::gt_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::gt_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::gt_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::ge(const Tensor& self, const Tensor& other) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::ge_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::ge_(Tensor& self, const Tensor& other) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(self, self, other);
-//   native::xpu::ge_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::ge_out(
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_comparison_op(out, self, other);
-//   native::xpu::ge_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::ge(const Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::ge_kernel(iter); return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::ge_(Tensor& self, const Scalar& other) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(self, self,
-//   wrapper); native::xpu::ge_kernel(iter); return self;
-// }
-
-// Tensor& XPUNativeFunctions::ge_out(
-//     const Tensor& self,
-//     const Scalar& other,
-//     Tensor& out) {
-//   auto wrapper = native::wrapped_scalar_tensor(other);
-//   TensorIterator iter;
-//   iter.build_borrowing_except_last_argument_comparison_op(out, self,
-//   wrapper); native::xpu::ge_kernel(iter); return out;
-// }
-
-// Tensor XPUNativeFunctions::isnan(const Tensor& self) {
-//   return XPUNativeFunctions::ne(self, self);
-// }
-
-// Tensor& XPUNativeFunctions::isnan_out(const Tensor& self, Tensor& out) {
-//   return XPUNativeFunctions::ne_out(self, self, out);
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/Copy.cpp b/src/ATen/native/xpu/Copy.cpp
index 27521a18c..60a1a42aa 100644
--- a/src/ATen/native/xpu/Copy.cpp
+++ b/src/ATen/native/xpu/Copy.cpp
@@ -329,28 +329,4 @@ Tensor& _copy_xpu(Tensor& self, const Tensor& src, bool non_blocking) {
 namespace native {
 REGISTER_XPU_DISPATCH(copy_stub, &native::xpu::_copy_xpu);
 }
-// Tensor& XPUNativeFunctions::copy_(
-//     Tensor& self,
-//     const Tensor& src,
-//     bool non_blocking) {
-//   return native::xpu::_copy_xpu(self, src, non_blocking);
-// }
-
-// Tensor XPUNativeFunctions::_to_copy(
-//     const Tensor& self,
-//     c10::optional<ScalarType> dtype,
-//     c10::optional<Layout> layout,
-//     c10::optional<Device> device,
-//     c10::optional<bool> pin_memory,
-//     bool non_blocking,
-//     c10::optional<c10::MemoryFormat> optional_memory_format) {
-//   return at::native::_to_copy(
-//       self,
-//       dtype,
-//       layout,
-//       device,
-//       pin_memory,
-//       non_blocking,
-//       optional_memory_format);
-// }
 } // namespace at
diff --git a/src/ATen/native/xpu/DilatedMaxPool2d.cpp b/src/ATen/native/xpu/DilatedMaxPool2d.cpp
index 6d8a53990..c97b14da3 100644
--- a/src/ATen/native/xpu/DilatedMaxPool2d.cpp
+++ b/src/ATen/native/xpu/DilatedMaxPool2d.cpp
@@ -31,228 +31,4 @@ TORCH_IMPL_FUNC(max_pool2d_with_indices_backward_out_xpu)
       ceil_mode);
 }
 } // namespace native
-
-// Tensor& max_pool2d_with_indices_backward_meta(
-//     const Tensor& gradOutput,
-//     const Tensor& input,
-//     IntArrayRef kernel_size,
-//     IntArrayRef stride,
-//     IntArrayRef padding,
-//     IntArrayRef dilation,
-//     bool ceil_mode,
-//     const Tensor& indices,
-//     Tensor& gradInput) {
-//   // #20866, #22032: Guarantee this for the official C++ API?
-//   TORCH_CHECK(
-//       kernel_size.size() == 1 || kernel_size.size() == 2,
-//       "max_pool2d: kernel_size must either be a single int, or a tuple of two
-//       ints")
-//   const int kH = safe_downcast<int, int64_t>(kernel_size[0]);
-//   const int kW = kernel_size.size() == 1
-//       ? kH
-//       : safe_downcast<int, int64_t>(kernel_size[1]);
-
-//   // NB: stride default is not expressible as an integer constant, so we
-//   accept
-//   // empty stride for this case
-//   TORCH_CHECK(
-//       stride.empty() || stride.size() == 1 || stride.size() == 2,
-//       "max_pool2d: stride must either be omitted, a single int, or a tuple of
-//       two ints")
-//   const int dH = stride.empty() ? kH : safe_downcast<int,
-//   int64_t>(stride[0]); const int dW = stride.empty() ? kW
-//       : stride.size() == 1      ? dH
-//                                 : safe_downcast<int, int64_t>(stride[1]);
-
-//   TORCH_CHECK(
-//       padding.size() == 1 || padding.size() == 2,
-//       "max_pool2d: padding must either be a single int, or a tuple of two
-//       ints");
-//   const int padH = safe_downcast<int, int64_t>(padding[0]);
-//   const int padW =
-//       padding.size() == 1 ? padH : safe_downcast<int, int64_t>(padding[1]);
-
-//   TORCH_CHECK(
-//       dilation.size() == 1 || dilation.size() == 2,
-//       "max_pool2d: dilation must be either a single int, or a tuple of two
-//       ints");
-//   const int dilationH = safe_downcast<int, int64_t>(dilation[0]);
-//   const int dilationW = dilation.size() == 1
-//       ? dilationH
-//       : safe_downcast<int, int64_t>(dilation[1]);
-
-//   TORCH_CHECK(
-//       input.dtype() == gradOutput.dtype(),
-//       "expected dtype ",
-//       input.dtype(),
-//       " for `gradOutput` but got dtype ",
-//       gradOutput.dtype());
-
-//   const auto memory_format = input.suggest_memory_format();
-//   if (memory_format == at::MemoryFormat::ChannelsLast) {
-//     TORCH_CHECK(
-//         input.ndimension() == 4,
-//         "non-empty 4D (batch mode) tensor expected for input with
-//         channels_last layout");
-//   } else if (memory_format == at::MemoryFormat::Contiguous) {
-//     TORCH_CHECK(
-//         (input.ndimension() == 3 || input.ndimension() == 4),
-//         "non-empty 3D or 4D (batch mode) tensor expected for input");
-//   } else {
-//     TORCH_CHECK(
-//         false,
-//         "Unsupport memory format. Supports only ChannelsLast, Contiguous");
-//   }
-
-//   /* sizes */
-//   const int64_t nInputPlane = input.size(-3);
-//   const int64_t inputHeight = input.size(-2);
-//   const int64_t inputWidth = input.size(-1);
-
-//   /* XXX preserve the existing shape check behavior */
-//   const int64_t outputHeight_for_shape_check = pooling_output_shape<int64_t>(
-//       inputHeight, kH, padH, dH, dilationH, ceil_mode);
-//   const int64_t outputWidth_for_shape_check = pooling_output_shape<int64_t>(
-//       inputWidth, kW, padW, dW, dilationW, ceil_mode);
-
-//   max_pool2d_backward_shape_check(
-//       input,
-//       gradOutput,
-//       indices,
-//       kH,
-//       kW,
-//       dH,
-//       dW,
-//       padH,
-//       padW,
-//       dilationH,
-//       dilationW,
-//       nInputPlane,
-//       inputHeight,
-//       inputWidth,
-//       outputHeight_for_shape_check,
-//       outputWidth_for_shape_check,
-//       memory_format);
-
-//   auto options = input.options().memory_format(memory_format);
-//   if (gradInput.defined()) {
-//     at::xpu::resize_out(gradInput, input.sizes(), {}, options);
-//   } else {
-//     gradInput = at::xpu::create_out(input.sizes(), {}, options);
-//   }
-
-//   return gradInput;
-// }
-
-// Tensor& max_pool2d_with_indices_backward_out_impl(
-//     const Tensor& grad_output_,
-//     const Tensor& self_,
-//     IntArrayRef kernel_size,
-//     IntArrayRef stride,
-//     IntArrayRef padding,
-//     IntArrayRef dilation,
-//     bool ceil_mode,
-//     const Tensor& indices_,
-//     Tensor& grad_input) {
-//   /* PyTorch support two cases of MaxPool2d:
-//        1. 3D: Input (C, H, W),  Output (C, H0, W0), Kernel (kH, kW)
-//        This case does not support channel last format. For a 3-dim tensor,
-//        the PyTorch suggest_memory_format can only be Contiguous or
-//        ChannelsLast1D (nwc), the ChannelsLast1D (nwc) does not match the
-//        sementics of Input (C, H, W) case. Then the suggest_memory_format can
-//        only be Contiguous.
-//        2. 4D: Input (N, C, H, W),  Output (N, C, H0, W0), Kernel (kH, kW)
-//        This case supports Contiguous and ChannelsLast2D memory_format. */
-//   Tensor self, grad_output, indices;
-//   if (self_.ndimension() == 3) {
-//     self = self_.contiguous();
-//     grad_output = grad_output_.contiguous();
-//     indices = indices_.contiguous();
-//     grad_input.zero_();
-//   } else {
-//     auto smf = self_.suggest_memory_format();
-//     self = self_.contiguous(smf);
-//     grad_output = grad_output_.contiguous(smf);
-//     indices = indices_.contiguous(smf);
-//     grad_input.zero_();
-//   }
-//   at::native::xpu::max_pool2d_with_indices_backward_out_kernel(
-//       grad_input,
-//       grad_output,
-//       self,
-//       indices,
-//       kernel_size,
-//       stride,
-//       padding,
-//       dilation,
-//       ceil_mode);
-//   return grad_input;
-// }
-
-// Tensor XPUNativeFunctions::max_pool2d_with_indices_backward(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     IntArrayRef kernel_size,
-//     IntArrayRef stride,
-//     IntArrayRef padding,
-//     IntArrayRef dilation,
-//     bool ceil_mode,
-//     const Tensor& indices) {
-//   Tensor grad_input;
-//   grad_input = max_pool2d_with_indices_backward_meta(
-//       grad_output,
-//       self,
-//       kernel_size,
-//       stride,
-//       padding,
-//       dilation,
-//       ceil_mode,
-//       indices,
-//       grad_input);
-//   grad_input = max_pool2d_with_indices_backward_out_impl(
-//       grad_output,
-//       self,
-//       kernel_size,
-//       stride,
-//       padding,
-//       dilation,
-//       ceil_mode,
-//       indices,
-//       grad_input);
-//   return grad_input;
-// }
-
-// Tensor& XPUNativeFunctions::max_pool2d_with_indices_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     IntArrayRef kernel_size,
-//     IntArrayRef stride,
-//     IntArrayRef padding,
-//     IntArrayRef dilation,
-//     bool ceil_mode,
-//     const Tensor& indices,
-//     Tensor& grad_input) {
-//   grad_input = max_pool2d_with_indices_backward_meta(
-//       grad_output,
-//       self,
-//       kernel_size,
-//       stride,
-//       padding,
-//       dilation,
-//       ceil_mode,
-//       indices,
-//       grad_input);
-//   grad_input = max_pool2d_with_indices_backward_out_impl(
-//       grad_output,
-//       self,
-//       kernel_size,
-//       stride,
-//       padding,
-//       dilation,
-//       ceil_mode,
-//       indices,
-//       grad_input);
-//   return grad_input;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/Distributions.cpp b/src/ATen/native/xpu/Distributions.cpp
index 8df8d7c30..b4cbfa948 100644
--- a/src/ATen/native/xpu/Distributions.cpp
+++ b/src/ATen/native/xpu/Distributions.cpp
@@ -24,111 +24,4 @@ REGISTER_XPU_DISPATCH(
     random_full_64_bits_range_stub,
     native::xpu::random_full_64_bits_range_kernel);
 } // namespace native
-
-// template <typename RNG>
-// struct NormalStub {
-//   void operator()(
-//       Tensor& self,
-//       double mean,
-//       double std,
-//       c10::optional<Generator> gen) {
-//     native::xpu::normal_kernel(self, mean, std, gen);
-//   }
-// };
-
-// Tensor& XPUNativeFunctions::normal_(
-//     Tensor& self,
-//     double mean,
-//     double std,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::normal_impl_<NormalStub, Generator>(
-//       self, mean, std, std::move(generator));
-// }
-
-// template <typename RNG>
-// struct UniformStub {
-//   void operator()(
-//       TensorIteratorBase& iter,
-//       double from,
-//       double to,
-//       c10::optional<Generator> gen) {
-//     native::xpu::uniform_kernel(iter, from, to, gen);
-//   }
-// };
-
-// Tensor& XPUNativeFunctions::uniform_(
-//     Tensor& self,
-//     double from,
-//     double to,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::uniform_impl_<UniformStub, Generator>(
-//       self, from, to, std::move(generator));
-// }
-
-// template <typename RNG>
-// struct BernoulliStub {
-//   void operator()(
-//       Tensor& self,
-//       const Tensor& p_,
-//       c10::optional<Generator> gen) {
-//     native::xpu::bernoulli_tensor_kernel(self, p_, gen);
-//   }
-//   void operator()(Tensor& self, double p, c10::optional<Generator> gen) {
-//     native::xpu::bernoulli_scalar_kernel(self, p, gen);
-//   }
-// };
-
-// Tensor& XPUNativeFunctions::bernoulli_(
-//     Tensor& self,
-//     const Tensor& p_,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::bernoulli_impl_<BernoulliStub, Generator>(
-//       self, p_, std::move(generator));
-// }
-
-// Tensor& XPUNativeFunctions::bernoulli_(
-//     Tensor& self,
-//     double p,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::bernoulli_impl_<BernoulliStub, Generator>(
-//       self, p, std::move(generator));
-// }
-
-// template <typename RNG>
-// struct RandomStub {
-//   void operator()(TensorIteratorBase& iter, c10::optional<Generator> gen) {
-//     native::xpu::random_kernel(iter, gen);
-//   }
-// };
-
-// Tensor& XPUNativeFunctions::random_(
-//     Tensor& self,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::random_impl<RandomStub, Generator>(
-//       self, std::move(generator));
-// }
-
-// template <typename RNG>
-// struct RandomFromToStub {
-//   void operator()(
-//       TensorIteratorBase& iter,
-//       uint64_t range,
-//       int64_t from,
-//       c10::optional<Generator> gen) {
-//     native::xpu::random_from_to_kernel(iter, range, from, gen);
-//   }
-//   void operator()(TensorIteratorBase& iter, c10::optional<Generator> gen) {
-//     native::xpu::random_full_64_bits_range_kernel(iter, gen);
-//   }
-// };
-
-// Tensor& random_(
-//     Tensor& self,
-//     int64_t from,
-//     c10::optional<int64_t> to_opt,
-//     ::std::optional<Generator> generator) {
-//   return native::templates::random_from_to_impl<RandomFromToStub, Generator>(
-//       self, from, to_opt, std::move(generator));
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/Fill.cpp b/src/ATen/native/xpu/Fill.cpp
index 47b192a5d..88b3f765d 100644
--- a/src/ATen/native/xpu/Fill.cpp
+++ b/src/ATen/native/xpu/Fill.cpp
@@ -6,46 +6,5 @@
 
 #include <ATen/native/xpu/sycl/FillKernel.h>
 namespace at::native {
-
-// Tensor& fill_out(Tensor& self, const Scalar& value) {
-//   auto iter = TensorIteratorConfig()
-//                   .set_check_mem_overlap(
-//                       false) // Fill is idempotent, so overlap is okay
-//                   .check_all_same_dtype(false)
-//                   .add_output(self)
-//                   .resize_outputs(false)
-//                   .build();
-//   native::xpu::fill_kernel(iter, value);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::fill_(Tensor& self, const Scalar& value) {
-//   return fill_out(self, value);
-// }
-
-// Tensor& XPUNativeFunctions::fill_(Tensor& self, const Tensor& value) {
-//   TORCH_CHECK(
-//       value.dim() == 0,
-//       "fill_ only supports 0-dimension value tensor but got tensor with ",
-//       value.dim(),
-//       " dimensions.");
-//   if (self.device() != value.device()) {
-//     return fill_out(self, value.item());
-//   }
-//   // Check if value is a view of self and if it is we clone
-//   // it to avoid overwriting self prematurely
-//   if (self.is_alias_of(value)) {
-//     self.copy_(value.clone());
-//   } else {
-//     self.copy_(value);
-//   }
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::zero_(Tensor& self) {
-//   return self.fill_(0);
-// }
-
 REGISTER_XPU_DISPATCH(fill_stub, &native::xpu::fill_kernel);
-
 } // namespace at::native
diff --git a/src/ATen/native/xpu/Indexing.cpp b/src/ATen/native/xpu/Indexing.cpp
index d9b613e46..70c3b64f0 100644
--- a/src/ATen/native/xpu/Indexing.cpp
+++ b/src/ATen/native/xpu/Indexing.cpp
@@ -43,13 +43,5 @@ Tensor index_select_xpu_(const Tensor& self, int64_t dim, const Tensor& index) {
   return at::native::index_select_out_xpu(self, dim, index, result);
 }
 
-// Tensor XPUNativeFunctions::index_select(
-//     const Tensor& self,
-//     int64_t dim,
-//     const Tensor& index) {
-//   auto out = at::empty({0}, self.options());
-//   return index_select_out(self, dim, index, out);
-// }
-
 } // namespace native
 } // namespace at
diff --git a/src/ATen/native/xpu/LossNLL.cpp b/src/ATen/native/xpu/LossNLL.cpp
index ddb2491ca..56e316b5d 100644
--- a/src/ATen/native/xpu/LossNLL.cpp
+++ b/src/ATen/native/xpu/LossNLL.cpp
@@ -56,249 +56,4 @@ TORCH_IMPL_FUNC(nll_loss_backward_out_xpu)
 }
 
 } // namespace native
-
-// void nll_loss_forward_meta(
-//     const Tensor& self,
-//     const Tensor& target,
-//     const OptionalTensorRef weight_opt,
-//     int64_t reduction,
-//     int64_t ignore_index,
-//     Tensor& output,
-//     Tensor& total_weight) {
-//   const Tensor& weight = weight_opt.getTensorRef();
-
-//   TORCH_CHECK(
-//       self.dim() > 0 && self.dim() <= 2, "input tensor should be 1D or 2D");
-//   TORCH_CHECK(
-//       target.dim() <= 1,
-//       "0D or 1D target tensor expected, multi-target not supported");
-
-//   auto no_batch_dim = self.dim() == 1 && target.dim() == 0;
-//   TORCH_CHECK(
-//       no_batch_dim || (self.size(0) == target.size(0)),
-//       "size mismatch (got input: ",
-//       self.sizes(),
-//       ", target: ",
-//       target.sizes(),
-//       ")")
-
-//   const auto n_classes = self.size(-1);
-
-//   TORCH_CHECK(
-//       !weight.defined() || (weight.dim() == 1 && weight.numel() ==
-//       n_classes), "weight tensor should be defined either for all ",
-//       n_classes,
-//       " classes or no classes"
-//       " but got weight tensor of shape: ",
-//       weight.sizes());
-
-//   const auto n_dims = self.dim();
-//   const auto batch_size = self.size(0);
-
-//   if (reduction == Reduction::None && n_dims == 2) {
-//     if (output.defined()) {
-//       at::xpu::resize_out(output, {batch_size}, {}, self.options());
-//     } else {
-//       output = at::xpu::create_out({batch_size}, {}, self.options());
-//     }
-//   } else {
-//     // produce scalar output when reducing or input is 1d
-//     if (output.defined()) {
-//       at::xpu::resize_out(output, {}, {}, self.options());
-//     } else {
-//       output = at::xpu::create_out({}, {}, self.options());
-//     }
-//   }
-//   if (total_weight.defined()) {
-//     at::xpu::resize_out(total_weight, {}, {}, self.options());
-//   } else {
-//     total_weight = at::xpu::create_out({}, {}, self.options());
-//   }
-// }
-
-// std::tuple<Tensor&, Tensor&> XPUNativeFunctions::nll_loss_forward_out(
-//     const Tensor& self,
-//     const Tensor& target,
-//     const c10::optional<Tensor>& weight,
-//     int64_t reduction,
-//     int64_t ignore_index,
-//     Tensor& output,
-//     Tensor& total_weight) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, output, "xpu::nll_loss_forward_out", "output");
-//   c10::impl::check_and_update_common_device(
-//       common_device, total_weight, "xpu::nll_loss_forward_out",
-//       "total_weight");
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "xpu::nll_loss_forward_out", "self");
-//   c10::impl::check_and_update_common_device(
-//       common_device, target, "xpu::nll_loss_forward_out", "target");
-//   c10::impl::check_and_update_common_device(
-//       common_device, weight, "xpu::nll_loss_forward_out", "weight");
-//   nll_loss_forward_meta(
-//       self,
-//       target,
-//       ((weight.has_value() && (*weight).defined())
-//            ? at::OptionalTensorRef(*weight)
-//            : at::OptionalTensorRef()),
-//       reduction,
-//       ignore_index,
-//       output,
-//       total_weight);
-//   return native::xpu::nll_loss_forward_kernel(
-//       self,
-//       target,
-//       ((weight.has_value() && (*weight).defined())
-//            ? at::OptionalTensorRef(*weight)
-//            : at::OptionalTensorRef()),
-//       reduction,
-//       ignore_index,
-//       output,
-//       total_weight);
-// }
-
-// std::tuple<Tensor, Tensor> XPUNativeFunctions::nll_loss_forward(
-//     const Tensor& self,
-//     const Tensor& target,
-//     const c10::optional<Tensor>& weight,
-//     int64_t reduction,
-//     int64_t ignore_index) {
-//   Tensor output;
-//   Tensor total_weight;
-//   return nll_loss_forward_out(
-//       self, target, weight, reduction, ignore_index, output, total_weight);
-// }
-
-// void nll_loss_backward_meta(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     const Tensor& target,
-//     OptionalTensorRef weight_opt,
-//     int64_t reduction,
-//     int64_t ignore_index,
-//     const Tensor& total_weight,
-//     Tensor& grad_input) {
-//   TORCH_CHECK(
-//       self.dim() > 0 && self.dim() <= 2, "input tensor should be 1D or 2D");
-//   TORCH_CHECK(
-//       target.dim() <= 1,
-//       "0D or 1D target tensor expected, multi-target not supported");
-
-//   auto no_batch_dim = self.dim() == 1 && target.dim() == 0;
-//   TORCH_CHECK(
-//       no_batch_dim || (self.size(0) == target.size(0)),
-//       "size mismatch (got input: ",
-//       self.sizes(),
-//       ", target: ",
-//       target.sizes(),
-//       ")")
-//   TORCH_CHECK(
-//       total_weight.numel() == 1,
-//       "expected total_weight to be a  single element tensor, got: ",
-//       total_weight.sizes(),
-//       " (",
-//       total_weight.numel(),
-//       " elements)");
-
-//   const auto& weight = weight_opt.getTensorRef();
-
-//   TORCH_CHECK(
-//       !weight.defined() || weight.numel() == self.size(-1),
-//       "weight tensor should be defined either for all or no classes");
-
-//   const auto n_dims = self.dim();
-
-//   if (reduction == Reduction::None && n_dims == 2) {
-//     const auto batch_size = self.size(0);
-//     check_dim_size(grad_output, 1, 0, batch_size);
-//   } else {
-//     TORCH_CHECK(
-//         grad_output.dim() <= 1 && grad_output.numel() == 1,
-//         "Expected a single element grad_output tensor, but got: ",
-//         grad_output.sizes());
-//   }
-//   if (grad_input.defined()) {
-//     at::xpu::resize_out(
-//         grad_input,
-//         self.sizes(),
-//         {},
-//         self.options().memory_format(LEGACY_CONTIGUOUS_MEMORY_FORMAT));
-//   } else {
-//     grad_input = at::xpu::create_out(
-//         self.sizes(),
-//         {},
-//         self.options().memory_format(LEGACY_CONTIGUOUS_MEMORY_FORMAT));
-//   }
-// }
-
-// Tensor& XPUNativeFunctions::nll_loss_backward_out(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     const Tensor& target,
-//     const c10::optional<Tensor>& weight,
-//     int64_t reduction,
-//     int64_t ignore_index,
-//     const Tensor& total_weight,
-//     Tensor& grad_input) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, grad_input, "xpu::nll_loss_backward_out", "grad_input");
-//   c10::impl::check_and_update_common_device(
-//       common_device, grad_output, "xpu::nll_loss_backward_out",
-//       "grad_output");
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "xpu::nll_loss_backward_out", "self");
-//   c10::impl::check_and_update_common_device(
-//       common_device, target, "xpu::nll_loss_backward_out", "target");
-//   c10::impl::check_and_update_common_device(
-//       common_device, weight, "xpu::nll_loss_backward_out", "weight");
-//   c10::impl::check_and_update_common_device(
-//       common_device,
-//       total_weight,
-//       "xpu::nll_loss_backward_out",
-//       "total_weight");
-//   nll_loss_backward_meta(
-//       grad_output,
-//       self,
-//       target,
-//       ((weight.has_value() && (*weight).defined())
-//            ? at::OptionalTensorRef(*weight)
-//            : at::OptionalTensorRef()),
-//       reduction,
-//       ignore_index,
-//       total_weight,
-//       grad_input);
-//   return native::xpu::nll_loss_backward_kernel(
-//       grad_output,
-//       self,
-//       target,
-//       ((weight.has_value() && (*weight).defined())
-//            ? at::OptionalTensorRef(*weight)
-//            : at::OptionalTensorRef()),
-//       reduction,
-//       ignore_index,
-//       total_weight,
-//       grad_input);
-// }
-
-// Tensor XPUNativeFunctions::nll_loss_backward(
-//     const Tensor& grad_output,
-//     const Tensor& self,
-//     const Tensor& target,
-//     const c10::optional<Tensor>& weight,
-//     int64_t reduction,
-//     int64_t ignore_index,
-//     const Tensor& total_weight) {
-//   Tensor grad_input;
-//   return nll_loss_backward_out(
-//       grad_output,
-//       self,
-//       target,
-//       weight,
-//       reduction,
-//       ignore_index,
-//       total_weight,
-//       grad_input);
-// }
 } // namespace at
\ No newline at end of file
diff --git a/src/ATen/native/xpu/Pow.cpp b/src/ATen/native/xpu/Pow.cpp
index b50974761..16d0f1ba5 100644
--- a/src/ATen/native/xpu/Pow.cpp
+++ b/src/ATen/native/xpu/Pow.cpp
@@ -7,109 +7,6 @@
 #include <ATen/native/xpu/sycl/PowKernels.h>
 
 namespace at {
-
-// TensorIterator pow_tensor_tensor_meta(
-//     const Tensor& base,
-//     const Tensor& exp,
-//     Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_binary_op(out, base, exp);
-//   return iter;
-// }
-
-// TensorIterator pow_tensor_scalar_meta(
-//     const Tensor& base,
-//     const Scalar& exp,
-//     Tensor& out) {
-//   // Numpy compatibility check:
-//   TORCH_CHECK(
-//       !(isIntegralType(base.scalar_type(), true) && exp.isIntegral(true) &&
-//         exp.toLong() < 0),
-//       "Integers to negative integer powers are not allowed.");
-
-//   auto common_dtype = at::result_type(base, exp);
-//   TensorIterator iter;
-//   iter.build_output_borrowing_argument_owning_unary_op(
-//       out, base.to(common_dtype));
-//   return iter;
-// }
-
-// Tensor XPUNativeFunctions::pow(const Tensor& self, const Tensor& exponent) {
-//   Tensor out;
-//   auto iter = pow_tensor_tensor_meta(self, exponent, out);
-//   native::xpu::pow_tensor_tensor_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::pow_(Tensor& self, const Tensor& exponent) {
-//   auto iter = pow_tensor_tensor_meta(self, exponent, self);
-//   native::xpu::pow_tensor_tensor_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::pow_out(
-//     const Tensor& base,
-//     const Tensor& exp,
-//     Tensor& out) {
-//   auto iter = pow_tensor_tensor_meta(base, exp, out);
-//   native::xpu::pow_tensor_tensor_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::pow(const Tensor& self, const Scalar& exponent) {
-//   Tensor out;
-//   auto iter = pow_tensor_scalar_meta(self, exponent, out);
-//   native::xpu::pow_tensor_scalar_kernel(iter, exponent);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::pow_(Tensor& self, const Scalar& exponent) {
-//   auto iter = pow_tensor_scalar_meta(self, exponent, self);
-//   if (exponent.equal(0.0) || exponent.equal(false)) {
-//     self.fill_(1);
-//   } else if (exponent.equal(1.0) || exponent.equal(true)) {
-//   } else {
-//     native::xpu::pow_tensor_scalar_kernel(iter, exponent);
-//   }
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::pow_out(
-//     const Tensor& self,
-//     const Scalar& exponent,
-//     Tensor& out) {
-//   auto iter = pow_tensor_scalar_meta(self, exponent, out);
-//   if (exponent.equal(0.0) || exponent.equal(false)) {
-//     out.fill_(1);
-//   } else if (exponent.equal(1.0) || exponent.equal(true)) {
-//     out.copy_(self);
-//   } else {
-//     native::xpu::pow_tensor_scalar_kernel(iter, exponent);
-//   }
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::pow(const Scalar& self, const Tensor& exponent) {
-//   Tensor out;
-//   auto iter = TensorIterator::binary_op(
-//       out, native::wrapped_scalar_tensor(self), exponent);
-//   native::xpu::pow_tensor_tensor_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::pow_out(
-//     const Scalar& self,
-//     const Tensor& exponent,
-//     Tensor& out) {
-//   if (self.equal(1.0)) {
-//     out.fill_(1);
-//   } else {
-//     return XPUNativeFunctions::pow_out(
-//         native::wrapped_scalar_tensor(self), exponent, out);
-//   }
-//   return out;
-// }
-
 namespace native {
 REGISTER_XPU_DISPATCH(
     pow_tensor_tensor_stub,
diff --git a/src/ATen/native/xpu/ReduceAllOps.cpp b/src/ATen/native/xpu/ReduceAllOps.cpp
index d0bdec218..8bd2b19e7 100644
--- a/src/ATen/native/xpu/ReduceAllOps.cpp
+++ b/src/ATen/native/xpu/ReduceAllOps.cpp
@@ -18,30 +18,6 @@ void min_all_kernel_impl(Tensor& result, const Tensor& input) {
   native::xpu::min_all_launch_kernel(iter);
 }
 
-// Tensor XPUNativeFunctions::min(const Tensor& self) {
-//   TORCH_CHECK(
-//       self.numel() > 0,
-//       "min(): Expected reduction dim to be specified for input.numel() == 0.
-//       Specify the reduction dim with the 'dim' argument.");
-//   Tensor result = at::empty({}, self.options());
-//   min_all_kernel_impl(result, self.contiguous());
-//   return result;
-// }
-
-// Tensor& XPUNativeFunctions::min_out(const Tensor& self, Tensor& out) {
-//   // First check if the devices match (CPU vs GPU)
-//   TORCH_CHECK(self.device() == out.device());
-
-//   TORCH_CHECK(canCast(
-//       typeMetaToScalarType(self.dtype()),
-//       typeMetaToScalarType(out.dtype())));
-
-//   at::native::resize_output(out, {});
-
-//   min_all_kernel_impl(out, self.contiguous());
-//   return out;
-// }
-
 void max_all_kernel_impl(Tensor& result, const Tensor& input) {
   auto dtype = input.scalar_type();
   auto iter = native::make_reduction(
@@ -49,30 +25,6 @@ void max_all_kernel_impl(Tensor& result, const Tensor& input) {
   native::xpu::max_all_launch_kernel(iter);
 }
 
-// Tensor XPUNativeFunctions::max(const Tensor& self) {
-//   TORCH_CHECK(
-//       self.numel() > 0,
-//       "max(): Expected reduction dim to be specified for input.numel() == 0.
-//       Specify the reduction dim with the 'dim' argument.");
-//   Tensor result = at::empty({}, self.options());
-//   max_all_kernel_impl(result, self.contiguous());
-//   return result;
-// }
-
-// Tensor& XPUNativeFunctions::max_out(const Tensor& self, Tensor& out) {
-//   // First check if the devices match (CPU vs GPU)
-//   TORCH_CHECK(self.device() == out.device());
-
-//   TORCH_CHECK(canCast(
-//       typeMetaToScalarType(self.dtype()),
-//       typeMetaToScalarType(out.dtype())));
-
-//   at::native::resize_output(out, {});
-
-//   max_all_kernel_impl(out, self.contiguous());
-//   return out;
-// }
-
 namespace native {
 REGISTER_XPU_DISPATCH(min_all_stub, min_all_kernel_impl);
 REGISTER_XPU_DISPATCH(max_all_stub, max_all_kernel_impl);
diff --git a/src/ATen/native/xpu/ReduceOps.cpp b/src/ATen/native/xpu/ReduceOps.cpp
index fdb5cb42f..7c116ce58 100644
--- a/src/ATen/native/xpu/ReduceOps.cpp
+++ b/src/ATen/native/xpu/ReduceOps.cpp
@@ -9,7 +9,7 @@
 #include <ATen/native/Resize.h>
 #include <ATen/native/TensorIterator.h>
 #include <comm/xpu_aten.h>
-// #include <ATen/xpu/XPUNativeFunctions.h>
+
 #include <ATen/native/xpu/sycl/ReduceOps.h>
 #include <ATen/native/xpu/sycl/ScanKernels.h>
 #include <ATen/native/xpu/sycl/ScanUtils.h>
@@ -45,47 +45,6 @@ void impl_func_cum_ops(
   }
 }
 
-// Tensor& XPUNativeFunctions::cumsum_out(
-//     const Tensor& self,
-//     int64_t dim,
-//     c10::optional<ScalarType> dtype,
-//     Tensor& result) {
-//   // Checking whether 'dim' is valid.
-//   maybe_wrap_dim(dim, self.dim());
-
-//   ScalarType out_dtype;
-
-//   if (!result.defined()) {
-//     auto is_integral =
-//         at::isIntegralType(self.scalar_type(), /*includeBool=*/true);
-//     out_dtype =
-//         dtype.value_or(is_integral ? ScalarType::Long : self.scalar_type());
-//     result = at::empty_strided(
-//         self.sizes(), self.strides(), self.options().dtype(out_dtype));
-//   } else {
-//     at::native::resize_output(result, self.sizes());
-//     result.as_strided_(self.sizes(), self.strides());
-//   }
-
-//   impl_func_cum_ops(self, dim, result, at::native::xpu::cumsum_kernel);
-//   return result;
-// }
-
-// Tensor XPUNativeFunctions::cumsum(
-//     const Tensor& self,
-//     int64_t dim,
-//     c10::optional<ScalarType> dtype) {
-//   Tensor result;
-//   return cumsum_out(self, dim, dtype, result);
-// }
-
-// Tensor& XPUNativeFunctions::cumsum_(
-//     Tensor& self,
-//     int64_t dim,
-//     c10::optional<ScalarType> dtype) {
-//   return cumsum_out(self, dim, dtype, self);
-// }
-
 static ScalarType infer_dtype_from_optional(
     const Tensor& self,
     const optional<ScalarType>& opt_dtype,
@@ -121,114 +80,6 @@ inline bool should_use_acc_buffer(at::TensorIterator& iter) {
   return true;
 }
 
-// Tensor& XPUNativeFunctions::sum_out(
-//     const Tensor& self,
-//     OptionalIntArrayRef opt_dim,
-//     bool keepdim,
-//     c10::optional<ScalarType> opt_dtype,
-//     Tensor& result) {
-//   auto out_dtype = infer_dtype_from_optional(self, opt_dtype, result);
-//   result = resize_reduction(result, self, opt_dim, keepdim, out_dtype);
-//   auto iter = meta::make_reduction_from_out_ty(
-//       self, result, opt_dim, keepdim, result.scalar_type());
-//   if (iter.numel() == 0) {
-//     result.zero_();
-//   } else {
-//     // Here is a limitation of TensorIterator reductions for permuted input
-//     with
-//     // lower precision on CPU. Consider the case: TensorIterator coalesces
-//     such
-//     // input and output to >= 2 dims tensors, and the output stride is [0, 0,
-//     x,
-//     // x, ...] with x >= 0 (two reduced dimensions and non-reduced dims).
-//     Since
-//     // the reduction loop only operates on two dimensions at a time, the
-//     // intermediate sums is forced to do accumulation in the second reduced
-//     dim
-//     // with lower precision. See
-//     https://github.com/pytorch/pytorch/issues/83149 if
-//     (should_use_acc_buffer(iter)) {
-//       auto tmp_output =
-//           at::empty(result.sizes(), result.options().dtype(kFloat));
-//       at::sum_outf(
-//           self.to(ScalarType::Float),
-//           opt_dim,
-//           keepdim,
-//           /*dtype=*/c10::nullopt,
-//           tmp_output);
-//       result.copy_(tmp_output);
-//     } else {
-//       native::xpu::sum_kernel(iter);
-//     }
-//   }
-//   return result;
-// }
-
-// Tensor XPUNativeFunctions::sum(
-//     const Tensor& self,
-//     OptionalIntArrayRef dim,
-//     bool keepdim,
-//     c10::optional<ScalarType> opt_dtype) {
-//   Tensor out;
-//   return XPUNativeFunctions::sum_out(self, dim, keepdim, opt_dtype, out);
-// }
-
-// Tensor& XPUNativeFunctions::sum_out(
-//     const Tensor& self,
-//     c10::optional<ScalarType> dtype,
-//     Tensor& out) {
-//   return XPUNativeFunctions::sum_out(self, IntArrayRef{}, false, dtype, out);
-// }
-
-// Tensor& mean_meta(
-//     const Tensor& self,
-//     OptionalIntArrayRef opt_dim,
-//     bool keepdim,
-//     optional<ScalarType> opt_dtype,
-//     Tensor& out) {
-//   auto in_dtype = at::native::get_dtype_from_self(self, opt_dtype, true);
-//   if (!at::isFloatingType(in_dtype) && !at::isComplexType(in_dtype)) {
-//     std::string what = "Input";
-//     std::string dtype = toString(self.scalar_type());
-
-//     if (opt_dtype.has_value()) {
-//       what = "Optional";
-//       dtype = toString(opt_dtype.value());
-//     }
-
-//     TORCH_CHECK(
-//         false,
-//         "mean(): could not infer output dtype. ",
-//         what,
-//         " dtype must be either a floating point or complex dtype. ",
-//         "Got: ",
-//         dtype);
-//   }
-
-//   auto out_dtype = infer_dtype_from_optional(self, opt_dtype, out);
-//   out = resize_reduction(out, self, opt_dim, keepdim, out_dtype);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::mean_out(
-//     const Tensor& self,
-//     OptionalIntArrayRef opt_dim,
-//     bool keepdim,
-//     c10::optional<ScalarType> opt_dtype,
-//     Tensor& result) {
-//   result = mean_meta(self, opt_dim, keepdim, opt_dtype, result);
-//   ScalarType dtype = result.scalar_type();
-//   // device is not CPU
-//   auto iter = at::meta::make_reduction_from_out_ty(
-//       self, result, opt_dim, keepdim, dtype);
-//   if (iter.numel() == 0) {
-//     result.fill_(std::numeric_limits<double>::quiet_NaN());
-//   } else {
-//     native::xpu::mean_kernel(iter);
-//   }
-//   return result;
-// }
-
 inline TensorIterator get_allany_iter(
     const Tensor& self,
     const Tensor& result,
@@ -293,30 +144,6 @@ Tensor& allany_meta(
   return result;
 }
 
-// Tensor XPUNativeFunctions::any(const Tensor& self, int64_t dim, bool keepdim)
-// {
-//   Tensor result;
-//   result = allany_meta(result, "any", self, dim, keepdim);
-//   allany_impl<0>(self, result, dim, keepdim, native::xpu::or_kernel);
-//   return result;
-// }
-
-// Tensor& XPUNativeFunctions::any_out(
-//     const Tensor& self,
-//     int64_t dim,
-//     bool keepdim,
-//     Tensor& out) {
-//   out = allany_meta(out, "any", self, dim, keepdim);
-//   allany_impl<0>(self, out, dim, keepdim, native::xpu::or_kernel);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::any_out(const Tensor& self, Tensor& out) {
-//   out = allany_meta(out, "any", self, {}, false);
-//   allany_impl<0>(self, out, {}, false, native::xpu::or_kernel);
-//   return out;
-// }
-
 template <class Stub>
 void argmax_argmin_impl(
     const Tensor& self,
@@ -371,29 +198,4 @@ static IntArrayRef optional_to_arrayref(const c10::optional<int64_t>& opt) {
   return opt.has_value() ? opt.value() : IntArrayRef{};
 }
 
-// Tensor& argmax_meta(
-//     const Tensor& self,
-//     c10::optional<int64_t> dim,
-//     bool keepdim,
-//     Tensor& out) {
-//   check_argmax_argmin("argmax()", self, dim);
-//   return resize_reduction(out, self, optional_to_arrayref(dim), keepdim,
-//   kLong);
-// }
-
-// Tensor& XPUNativeFunctions::argmax_out(
-//     const Tensor& self,
-//     c10::optional<int64_t> dim,
-//     bool keepdim,
-//     Tensor& out) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, out, "XPUNativeFunctions::argmax_out", "out");
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "XPUNativeFunctions::argmax_out", "self");
-//   out = argmax_meta(self, dim, keepdim, out);
-//   argmax_argmin_impl(self, dim, keepdim, out, native::xpu::argmax_kernel);
-//   return out;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/Resize.cpp b/src/ATen/native/xpu/Resize.cpp
index dbb121c3b..066410c02 100644
--- a/src/ATen/native/xpu/Resize.cpp
+++ b/src/ATen/native/xpu/Resize.cpp
@@ -177,12 +177,6 @@ const at::Tensor& resize_xpu_(
   return native::xpu::resize_xpu_(self, size, memory_format);
 }
 
-// Tensor& XPUNativeFunctions::set_(Tensor& self, Storage source) {
-//   int64_t new_size =
-//       static_cast<int64_t>(source.nbytes() / self.dtype().itemsize());
-//   return self.set_(source, 0, new_size, {});
-// }
-
 Tensor& set_storage_xpu_(
     Tensor& self,
     Storage source,
@@ -206,11 +200,5 @@ Tensor& set_xpu_(Tensor& result) {
   TORCH_INTERNAL_ASSERT(dtype == result.dtype());
   return result;
 }
-
 } // namespace native
-
-// Tensor& set_(Tensor& self, const at::Tensor& source) {
-//   return at::native::set_tensor_(self, source);
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
index b153b50e3..b4f818dcb 100644
--- a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
+++ b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
@@ -9,10 +9,9 @@
 #include <ATen/native/IndexKernel.h>
 #include <ATen/native/TensorAdvancedIndexing.h>
 #include <ATen/native/TensorIterator.h>
-#include <comm/xpu_aten.h>
-// #include <ATen/xpu/XPUNativeFunctions.h>
 #include <ATen/native/xpu/sycl/IndexingKernel.h>
 #include <comm/RegisterUtils.h>
+#include <comm/xpu_aten.h>
 #include <torch/library.h>
 
 #include <ATen/ops/index_add_meta.h>
@@ -41,75 +40,8 @@ TORCH_IMPL_FUNC(index_add_xpu_out)
   //   index_func_meta_impl(result, self, dim, index, source, "index_add");
   native::xpu::index_add_kernel(self, dim, index, source, alpha, result);
 }
-
-// Tensor index_select_xpu_(const Tensor& self, int64_t dim, const Tensor&
-// index) {
-//   Tensor result = at::empty({0}, self.options());
-//   return at::native::index_select_out_xpu(self, dim, index, result);
-// }
-
 } // namespace native
 
-// Tensor& XPUNativeFunctions::masked_fill_(
-//     Tensor& self,
-//     const Tensor& mask,
-//     const Scalar& value) {
-//   TORCH_CHECK(
-//       self.device() == mask.device(),
-//       "expected self and mask to be on the same device, but got mask on ",
-//       mask.device(),
-//       " and self on ",
-//       self.device());
-//   TORCH_CHECK(
-//       mask.scalar_type() == kBool,
-//       "masked_fill only supports boolean masks, but got dtype ",
-//       mask.scalar_type());
-//   auto maybe_outnames =
-//       namedinference::broadcast_to_outnames(self, mask, "masked_fill_");
-//   if (at::has_internal_overlap(self) == MemOverlap::Yes) {
-//     TORCH_WARN(
-//         "Use of masked_fill_ on expanded tensors is deprecated. "
-//         "Please clone() the tensor before performing this operation. "
-//         "This also applies to advanced indexing e.g. tensor[mask] = scalar");
-//   }
-//   at::assert_no_partial_overlap(self, mask);
-
-//   c10::MaybeOwned<Tensor> b_mask = expand_inplace(self, mask,
-//   "masked_fill_");
-
-//   auto iter = TensorIteratorConfig()
-//                   .set_check_mem_overlap(false)
-//                   .check_all_same_dtype(false)
-//                   .resize_outputs(false)
-//                   .add_output(self)
-//                   .add_const_input(self)
-//                   .add_const_input(*b_mask)
-//                   .build();
-
-//   native::xpu::masked_fill_kernel(iter, value);
-//   namedinference::propagate_names_if_nonempty(self, maybe_outnames);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::masked_fill_(
-//     Tensor& self,
-//     const Tensor& mask,
-//     const Tensor& value) {
-//   TORCH_CHECK(
-//       value.dim() == 0,
-//       "masked_fill_ only supports a 0-dimensional value tensor, but got
-//       tensor " "with ", value.dim(), " dimension(s).");
-//   // We hit this function if either of the input tensor lives on XPU.
-//   // It is ok, if `value` is `CPU` tensor but we should not allow `self` or
-//   // `mask` to be CPU tensor. Check for `self` and `mask` being on same
-//   device
-//   // exists in `masked_fill_` (Scalar version).
-//   TORCH_CHECK(
-//       self.device().is_xpu(),
-//       "masked_fill_: Expected inputs to be on same device")
-//   return XPUNativeFunctions::masked_fill_(self, mask, value.item());
-// }
-
 void index_func_meta_impl(
     Tensor& result,
     const Tensor& self,
@@ -196,24 +128,4 @@ void index_func_meta_impl(
   }
 }
 
-// Tensor& XPUNativeFunctions::index_add_out(
-//     const Tensor& self,
-//     int64_t dim,
-//     const Tensor& index,
-//     const Tensor& source,
-//     const Scalar& alpha,
-//     Tensor& out) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "xpu::index_add_out", "self");
-//   c10::impl::check_and_update_common_device(
-//       common_device, index, "xpu::index_add_out", "index");
-//   c10::impl::check_and_update_common_device(
-//       common_device, source, "xpu::index_add_out", "source");
-//   dim = maybe_wrap_dim(dim, self.dim());
-//   index_func_meta_impl(out, self, dim, index, source, "index_add");
-//   native::xpu::index_add_kernel(self, dim, index, source, alpha, out);
-//   return out;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/TensorCompare.cpp b/src/ATen/native/xpu/TensorCompare.cpp
index 3ada05c7b..713013b7c 100644
--- a/src/ATen/native/xpu/TensorCompare.cpp
+++ b/src/ATen/native/xpu/TensorCompare.cpp
@@ -5,7 +5,6 @@
 #include <ATen/native/TensorCompare.h>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/TypeProperties.h>
-// #include <ATen/xpu/XPUNativeFunctions.h>
 #include <ATen/native/xpu/sycl/ReduceMaxValuesKernel.h>
 #include <ATen/native/xpu/sycl/ReduceMinValuesKernel.h>
 #include <ATen/native/xpu/sycl/TensorCompare.h>
@@ -82,26 +81,6 @@ Tensor& where_self_out(
   return out;
 }
 
-// Tensor& XPUNativeFunctions::where_out(
-//     const Tensor& condition,
-//     const Tensor& self,
-//     const Tensor& other,
-//     Tensor& out) {
-//   return where_self_out(condition, self, other, out);
-// }
-
-// Tensor XPUNativeFunctions::where(
-//     const Tensor& condition,
-//     const Tensor& self,
-//     const Tensor& other) {
-//   auto device = out_device(condition, self, other);
-//   auto result_type = at::native::result_type(self, other);
-//   Tensor ret = at::empty({0},
-//   self.options().dtype(result_type).device(device));
-//   where_self_out(condition, self, other, ret);
-//   return ret;
-// }
-
 TensorIterator clamp_meta(
     const Tensor& self,
     const OptionalScalarRef min,
@@ -144,124 +123,6 @@ TensorIterator clamp_meta(
   return iter;
 }
 
-// Tensor& XPUNativeFunctions::clamp_out(
-//     const Tensor& self,
-//     const c10::optional<Scalar>& min,
-//     const c10::optional<Scalar>& max,
-//     Tensor& result) {
-//   auto iter = clamp_meta(
-//       self,
-//       (min.has_value() ? at::OptionalScalarRef(&(min.value()))
-//                        : at::OptionalScalarRef()),
-//       (max.has_value() ? at::OptionalScalarRef(&(max.value()))
-//                        : at::OptionalScalarRef()),
-//       result);
-//   using at::native::detail::ClampLimits;
-//   if (min && max) {
-//     if ((*min).toDouble() != (*min).toDouble() ||
-//         (*max).toDouble() != (*max).toDouble()) {
-//       at::fill_(
-//           const_cast<Tensor&>(result),
-//           std::numeric_limits<double>::quiet_NaN());
-//     } else {
-//       native::xpu::clamp_scalar_kernel(iter, *min, *max);
-//     }
-//   } else if (max) {
-//     native::xpu::clamp_max_scalar_kernel(iter, *max);
-//   } else if (min) {
-//     native::xpu::clamp_min_scalar_kernel(iter, *min);
-//   }
-//   return result;
-// }
-
-// TensorIterator clamp_min_meta(
-//     const Tensor& self,
-//     const Scalar& min,
-//     Tensor& result) {
-//   TensorIterator iter;
-//   ScalarType result_type = self.scalar_type();
-//   TORCH_CHECK(
-//       !isComplexType(result_type), "clamp is not supported for complex
-//       types");
-//   TORCH_CHECK(!min.isComplex(), "clamp is not supported for complex types");
-//   // Floating is the highest supported
-//   if (!isFloatingType(result_type)) {
-//     auto result_type = at::native::result_type(self, min);
-//     TORCH_CHECK(
-//         (result_type == self.scalar_type() || !(result.defined()) ||
-//          !(result.is_same(self))),
-//         "result type ",
-//         result_type,
-//         " can't be cast to the desired output type ",
-//         self.dtype());
-//     iter.build_unary_op(result, self.to(result_type));
-//   } else {
-//     iter.build_borrowing_unary_op(result, self);
-//   }
-//   return iter;
-// }
-
-// Tensor& XPUNativeFunctions::clamp_min_out(
-//     const Tensor& self,
-//     const Scalar& min,
-//     Tensor& result) {
-//   auto iter = clamp_min_meta(self, min, result);
-//   if (min.toDouble() != min.toDouble()) {
-//     at::fill_(const_cast<Tensor&>(result), min);
-//   } else {
-//     native::xpu::clamp_min_scalar_kernel(iter, min);
-//   }
-//   return result;
-// }
-
-// TensorIterator clamp_max_meta(
-//     const Tensor& self,
-//     const Scalar& max,
-//     Tensor& result) {
-//   TensorIterator iter;
-//   // we could wrap max into tensor and send to tensor overload,
-//   // but relu is implemented via clamp_min, so for perf an uniformity reasons
-//   // do a faster but correct thing
-//   ScalarType result_type = self.scalar_type();
-//   TORCH_CHECK(
-//       !isComplexType(result_type), "clamp is not supported for complex
-//       types");
-//   TORCH_CHECK(!max.isComplex(), "clamp is not supported for complex types");
-//   // Floating is the highest supported
-//   if (!isFloatingType(result_type)) {
-//     auto result_type = at::native::result_type(self, max);
-//     TORCH_CHECK(
-//         (result_type == self.scalar_type()) ||
-//             (!(result.defined()) || !(result.is_same(self))),
-//         "result type ",
-//         result_type,
-//         " can't be cast to the desired output type ",
-//         self.dtype());
-//     iter.build_unary_op(result, self.to(result_type));
-//   } else {
-//     iter.build_borrowing_unary_op(result, self);
-//   }
-//   return iter;
-// }
-
-// Tensor& XPUNativeFunctions::clamp_max_out(
-//     const Tensor& self,
-//     const Scalar& max,
-//     Tensor& result) {
-//   auto iter = clamp_max_meta(self, max, result);
-//   if (max.toDouble() != max.toDouble()) {
-//     // TODO this is not great, building TI again is expensive, but I can't
-//     use
-//     // fill_stub because fill is not structured
-//     // this is a corner case anyway
-//     at::fill_(const_cast<Tensor&>(result),
-//     native::wrapped_scalar_tensor(max));
-//   } else {
-//     native::xpu::clamp_max_scalar_kernel(iter, max);
-//   }
-//   return result;
-// }
-
 void min_kernel_impl(
     const Tensor& result,
     const Tensor& indice,
@@ -307,36 +168,4 @@ static void check_unsupported_complex(const char* name, const Tensor& self) {
   TORCH_CHECK(!self.is_complex(), name, ": does not support complex input");
 }
 
-// ::std::tuple<Tensor&, Tensor&> XPUNativeFunctions::min_out(
-//     const Tensor& self,
-//     int64_t dim,
-//     bool keepdim,
-//     Tensor& values,
-//     Tensor& indices) {
-//   dim = maybe_wrap_dim(dim, self.dim());
-//   at::native::zero_numel_check_dims(self, dim, "min()");
-//   check_unsupported_complex("min()", self);
-//   at::xpu::resize_reduction_with_indices(
-//       values, indices, self, dim, keepdim, self.scalar_type());
-
-//   minmax_out_impl(self, dim, keepdim, values, indices, min_kernel_impl);
-//   return {values, indices};
-// }
-
-// ::std::tuple<Tensor&, Tensor&> XPUNativeFunctions::max_out(
-//     const Tensor& self,
-//     int64_t dim,
-//     bool keepdim,
-//     Tensor& values,
-//     Tensor& indices) {
-//   dim = maybe_wrap_dim(dim, self.dim());
-//   at::native::zero_numel_check_dims(self, dim, "max()");
-//   check_unsupported_complex("max()", self);
-//   at::xpu::resize_reduction_with_indices(
-//       values, indices, self, dim, keepdim, self.scalar_type());
-
-//   minmax_out_impl(self, dim, keepdim, values, indices, max_kernel_impl);
-//   return {values, indices};
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/TensorFactories.cpp b/src/ATen/native/xpu/TensorFactories.cpp
index a45078452..ba36405c9 100644
--- a/src/ATen/native/xpu/TensorFactories.cpp
+++ b/src/ATen/native/xpu/TensorFactories.cpp
@@ -59,11 +59,4 @@ Tensor empty_strided_xpu(
 }
 
 } // namespace native
-
-// Tensor XPUNativeFunctions::clone(
-//     const Tensor& self,
-//     c10::optional<MemoryFormat> memory_format) {
-//   return at::native::clone(self, memory_format);
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/TensorShape.cpp b/src/ATen/native/xpu/TensorShape.cpp
index 5138d79f3..e75c6e8f4 100644
--- a/src/ATen/native/xpu/TensorShape.cpp
+++ b/src/ATen/native/xpu/TensorShape.cpp
@@ -60,266 +60,4 @@ TORCH_IMPL_FUNC(cat_out_xpu)
 }
 
 } // namespace native
-
-// Tensor XPUNativeFunctions::view(const Tensor& self, IntArrayRef size) {
-//   return at::native::view(self, size);
-// }
-
-// Tensor XPUNativeFunctions::view_as_real(const at::Tensor& self) {
-//   return at::native::view_as_real(self);
-// }
-
-// Tensor XPUNativeFunctions::view_as_complex(const Tensor& self) {
-//   return at::native::view_as_complex(self);
-// }
-
-// Tensor XPUNativeFunctions::as_strided(
-//     const Tensor& self,
-//     IntArrayRef size,
-//     IntArrayRef stride,
-//     c10::optional<int64_t> storage_offset = c10::nullopt) {
-//   if (self.is_quantized()) {
-//     return at::native::as_strided_qtensorimpl(
-//         self, size, stride, storage_offset);
-//   }
-//   return at::native::as_strided_tensorimpl(self, size, stride,
-//   storage_offset);
-// }
-
-// Tensor XPUNativeFunctions::_reshape_alias(
-//     const Tensor& self,
-//     IntArrayRef size,
-//     IntArrayRef stride) {
-//   return at::native::_reshape_alias(self, size, stride);
-// }
-
-// Tensor XPUNativeFunctions::unfold(
-//     const Tensor& self,
-//     int64_t dimension,
-//     int64_t size,
-//     int64_t step) {
-//   return at::native::unfold(self, dimension, size, step);
-// }
-
-// inline c10::MemoryFormat cat_compute_output_memory_format(
-//     const MaterializedITensorListRef& inputs) {
-//   c10::optional<c10::MemoryFormat> format = c10::nullopt;
-//   for (const Tensor& t : inputs) {
-//     auto f = t.suggest_memory_format();
-//     if (f == c10::MemoryFormat::Contiguous) {
-//       return f;
-//     }
-//     if (format.has_value() && format.value() != f) {
-//       return c10::MemoryFormat::Contiguous;
-//     }
-//     format = f;
-//   }
-//   return format.value();
-// }
-
-// inline void cat_check_no_zero_dim(const MaterializedITensorListRef& tensors)
-// {
-//   size_t i = 0;
-//   for (const Tensor& t : tensors) {
-//     TORCH_CHECK(
-//         t.dim() > 0,
-//         "zero-dimensional tensor (at position ",
-//         i,
-//         ") cannot be concatenated");
-//     i++;
-//   }
-// }
-
-// void cat_meta(
-//     const ITensorListRef& tensors,
-//     int64_t& dim,
-//     Tensor& result,
-//     size_t& valid,
-//     bool& all_contiguous,
-//     bool& all_same_dtype,
-//     bool& all_same_sizes_and_stride,
-//     c10::MemoryFormat& memory_format) {
-//   // previously, size [0] tensors were the only possible empty tensors; thus,
-//   it
-//   // wasn't possible to cat empty tensors unless all the other tensors were
-//   // 1-dimensional, so we allowed these tensors to be "skipped".  We maintain
-//   // this behavior for backwards compatibility, but only for this specific
-//   size
-//   // (i.e. other empty sizes are not skipped).
-//   auto materialized = tensors.materialize();
-
-//   cat_check_no_zero_dim(materialized);
-//   dim = at::legacy_cat_wrap_dim(dim, materialized);
-
-//   // Checking names before the actual dimensions.
-//   auto maybe_outnames = namedinference::compute_cat_outnames(materialized);
-
-//   TORCH_CHECK(
-//       !materialized.empty(),
-//       "torch.cat(): expected a non-empty list of Tensors");
-
-//   // Look for the first valid tensor.
-//   valid = materialized.size();
-//   for (const auto i : c10::irange(materialized.size())) {
-//     if (!at::native::cat_should_skip_tensor(materialized[i].get())) {
-//       valid = i;
-//       break;
-//     }
-//   }
-
-//   all_contiguous = true;
-//   all_same_dtype = true;
-//   all_same_sizes_and_stride = true;
-//   memory_format = cat_compute_output_memory_format(materialized);
-
-//   // Compute what the output dtype should be:
-//   auto is_out_defined = result.defined();
-//   auto out_dtype = at::native::result_type(tensors);
-
-//   // If the output tensor is defined, we need to take it into account
-//   // when computing the actual output dtype and the flags.
-//   if (is_out_defined) {
-//     // Check for type promotion, if the output tensor is defined.
-//     TORCH_CHECK(
-//         canCast(out_dtype, result.scalar_type()),
-//         "torch.cat(): input types can't be cast to the desired output type ",
-//         result.scalar_type());
-//     out_dtype = result.scalar_type();
-//     all_contiguous = result.is_contiguous(memory_format);
-//   }
-
-//   // Fallback 'set_output' parameters.
-//   // (in case we don't find a valid tensor)
-//   DimVector sizes{0};
-//   TensorOptions options =
-//       materialized[0].get().options().dtype(out_dtype).memory_format(
-//           memory_format);
-
-//   // If we found a valid tensor, check whether the input tensors
-//   // are compatible, i.e. we can execute `cat` on them.
-//   bool found_valid_tensor = valid < materialized.size();
-//   if (found_valid_tensor) {
-//     TORCH_CHECK(
-//         dim <= materialized[valid].get().dim(),
-//         "torch.cat(): dimension ",
-//         dim,
-//         "out of range");
-
-//     // Compute the output tensor size.
-//     // It should have the same shape as any other valid tensor,
-//     // except in the dimension 'dim'.
-//     size_t size_at_dim = 0;
-//     for (const auto i : c10::irange(materialized.size())) {
-//       const Tensor& t = materialized[i];
-//       all_same_dtype = all_same_dtype && out_dtype == t.scalar_type();
-//       if (!at::native::cat_should_skip_tensor(t)) {
-//         at::native::check_cat_shape_except_dim(materialized[valid], t, dim,
-//         i); size_at_dim += t.size(dim); all_contiguous = all_contiguous &&
-//         t.is_contiguous(memory_format); all_same_sizes_and_stride =
-//         all_same_sizes_and_stride &&
-//             t.sizes() == materialized[valid].get().sizes() &&
-//             t.strides() == materialized[valid].get().strides();
-//       } else {
-//         all_contiguous = false;
-//       }
-//     }
-
-//     // Actually set the output.
-//     sizes = materialized[valid].get().sizes().vec();
-//     sizes[dim] = size_at_dim;
-//     options =
-//         materialized[valid].get().options().dtype(out_dtype).memory_format(
-//             memory_format);
-//   }
-
-//   if (is_out_defined) {
-//     at::xpu::resize_out(result, sizes, {}, options);
-//   } else {
-//     result = at::xpu::create_out(sizes, {}, options);
-//   }
-
-//   if (!maybe_outnames.empty()) {
-//     namedinference::propagate_names(result, maybe_outnames);
-//   }
-//   // Checks for overlaps between the inputs and the output tensor.
-//   if (is_out_defined && found_valid_tensor) {
-//     at::assert_no_internal_overlap(result);
-//     for (const Tensor& t : materialized) {
-//       at::assert_no_overlap(result, t);
-//     }
-//   }
-// }
-
-// Tensor& XPUNativeFunctions::cat_out(
-//     const ITensorListRef& tensors,
-//     int64_t dim,
-//     Tensor& result) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, result, "xpu::cat_out", "out");
-//   c10::impl::check_and_update_common_device(
-//       common_device, tensors, "xpu::cat_out", "tensors");
-
-//   size_t valid;
-//   bool all_contiguous;
-//   bool all_same_dtype;
-//   bool all_same_sizes_and_stride;
-//   c10::MemoryFormat memory_format;
-//   cat_meta(
-//       tensors,
-//       dim,
-//       result,
-//       valid,
-//       all_contiguous,
-//       all_same_dtype,
-//       all_same_sizes_and_stride,
-//       memory_format);
-
-//   at::native::xpu::cat_out_kernel(
-//       tensors,
-//       dim,
-//       valid,
-//       all_contiguous,
-//       all_same_dtype,
-//       all_same_sizes_and_stride,
-//       memory_format,
-//       result);
-
-//   return result;
-// }
-
-// Tensor XPUNativeFunctions::cat(const ITensorListRef& tensors, int64_t dim) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, tensors, "xpu::cat", "tensors");
-
-//   Tensor result;
-//   size_t valid;
-//   bool all_contiguous;
-//   bool all_same_dtype;
-//   bool all_same_sizes_and_stride;
-//   c10::MemoryFormat memory_format;
-//   cat_meta(
-//       tensors,
-//       dim,
-//       result,
-//       valid,
-//       all_contiguous,
-//       all_same_dtype,
-//       all_same_sizes_and_stride,
-//       memory_format);
-
-//   at::native::xpu::cat_out_kernel(
-//       tensors,
-//       dim,
-//       valid,
-//       all_contiguous,
-//       all_same_dtype,
-//       all_same_sizes_and_stride,
-//       memory_format,
-//       result);
-
-//   return result;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/TriangluarOps.cpp b/src/ATen/native/xpu/TriangluarOps.cpp
index 416290e8a..8294fac43 100644
--- a/src/ATen/native/xpu/TriangluarOps.cpp
+++ b/src/ATen/native/xpu/TriangluarOps.cpp
@@ -21,65 +21,4 @@ TORCH_IMPL_FUNC(triu_xpu)(const Tensor& self, int64_t k, const Tensor& result) {
     xpu::triu_kernel(result, self, k);
   }
 }
-// void tril_meta(const Tensor& self, int64_t k) {
-//   TORCH_CHECK(
-//       self.dim() >= 2, "tril: input tensor must have at least 2 dimensions");
-// }
-
-// Tensor& XPUNativeFunctions::tril_out(
-//     const Tensor& self,
-//     int64_t diagonal,
-//     Tensor& out) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, out, "xpu::tril_out", "out");
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "xpu::tril_out", "self");
-//   tril_meta(self, diagonal);
-//   xpu::resize_out(out, self.sizes(), {}, self.options());
-//   return native::xpu::tril_kernel(out, self, diagonal);
-// }
-
-// Tensor XPUNativeFunctions::tril(const Tensor& self, int64_t diagonal) {
-//   tril_meta(self, diagonal);
-//   Tensor out = xpu::create_out(self.sizes(), {}, self.options());
-//   return tril_out(self, diagonal, out);
-// }
-
-// Tensor& XPUNativeFunctions::tril_(Tensor& self, int64_t diagonal) {
-//   tril_meta(self, diagonal);
-//   xpu::check_inplace(self, self.sizes(), self.options());
-//   return tril_out(self, diagonal, self);
-// }
-
-// void triu_meta(const Tensor& self, int64_t k) {
-//   TORCH_CHECK(
-//       self.dim() >= 2, "triu: input tensor must have at least 2 dimensions");
-// }
-
-// Tensor& XPUNativeFunctions::triu_out(
-//     const Tensor& self,
-//     int64_t diagonal,
-//     Tensor& out) {
-//   std::optional<Device> common_device = std::nullopt;
-//   c10::impl::check_and_update_common_device(
-//       common_device, out, "xpu::triu_out", "out");
-//   c10::impl::check_and_update_common_device(
-//       common_device, self, "xpu::triu_out", "self");
-//   triu_meta(self, diagonal);
-//   xpu::resize_out(out, self.sizes(), {}, self.options());
-//   return native::xpu::triu_kernel(out, self, diagonal);
-// }
-
-// Tensor XPUNativeFunctions::triu(const Tensor& self, int64_t diagonal) {
-//   triu_meta(self, diagonal);
-//   Tensor out = xpu::create_out(self.sizes(), {}, self.options());
-//   return triu_out(self, diagonal, out);
-// }
-
-// Tensor& XPUNativeFunctions::triu_(Tensor& self, int64_t diagonal) {
-//   triu_meta(self, diagonal);
-//   xpu::check_inplace(self, self.sizes(), self.options());
-//   return triu_out(self, diagonal, self);
-// }
 } // namespace at::native
diff --git a/src/ATen/native/xpu/UnaryOps.cpp b/src/ATen/native/xpu/UnaryOps.cpp
index add2058df..f1d631138 100644
--- a/src/ATen/native/xpu/UnaryOps.cpp
+++ b/src/ATen/native/xpu/UnaryOps.cpp
@@ -122,303 +122,4 @@ template <typename OutImpl>
 static inline Tensor& unary_op_impl_(Tensor& self, OutImpl& out_impl) {
   return out_impl(self, self);
 }
-
-// Tensor XPUNativeFunctions::abs(const Tensor& self) {
-//   return unary_op_impl_with_complex_to_float(self, at::abs_out);
-// }
-
-// Tensor& XPUNativeFunctions::abs_(Tensor& self) {
-//   TORCH_CHECK(
-//       !self.is_complex(), "In-place abs is not supported for complex
-//       tensors.");
-//   return unary_op_impl_(self, at::abs_out);
-// }
-
-// Tensor& XPUNativeFunctions::abs_out(const Tensor& self, Tensor& out) {
-//   return unary_op_impl_with_complex_to_float_out(
-//       out,
-//       self,
-//       native::xpu::abs_kernel,
-//       /*promotes_integer_to_float=*/false);
-// }
-
-// Tensor XPUNativeFunctions::sin(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sin_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::sin_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::sin_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::sin_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sin_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::cos(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::cos_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::cos_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::cos_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::cos_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::cos_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::log(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::log_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::log_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::log_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::log_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::log_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::sqrt(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sqrt_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::sqrt_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::sqrt_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::sqrt_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sqrt_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::rsqrt(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::rsqrt_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::rsqrt_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::rsqrt_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::rsqrt_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::rsqrt_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::tanh(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::tanh_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::tanh_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::tanh_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::tanh_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::tanh_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::neg(const Tensor& self) {
-//   TORCH_CHECK(
-//       self.scalar_type() != kBool,
-//       "Negation, the `-` operator, on a bool tensor is not supported. "
-//       "If you are trying to invert a mask, use the `~` or `logical_not()`
-//       operator instead.");
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(out, self);
-//   native::xpu::neg_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::neg_(Tensor& self) {
-//   TORCH_CHECK(
-//       self.scalar_type() != kBool,
-//       "Negation, the `-` operator, on a bool tensor is not supported. "
-//       "If you are trying to invert a mask, use the `~` or `logical_not()`
-//       operator instead.");
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(self, self);
-//   native::xpu::neg_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::neg_out(const Tensor& self, Tensor& out) {
-//   TORCH_CHECK(
-//       self.scalar_type() != kBool,
-//       "Negation, the `-` operator, on a bool tensor is not supported. "
-//       "If you are trying to invert a mask, use the `~` or `logical_not()`
-//       operator instead.");
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(out, self);
-//   native::xpu::neg_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::reciprocal(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::reciprocal_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::reciprocal_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::reciprocal_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::reciprocal_out(const Tensor& self, Tensor& out)
-// {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::reciprocal_kernel(iter);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::bitwise_not_out(const Tensor& self, Tensor&
-// out)
-// {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(out, self);
-//   native::xpu::bitwise_not_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::exp(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::exp_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::exp_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::exp_kernel(iter);
-//   return out;
-// }
-
-// Tensor& XPUNativeFunctions::exp_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::exp_kernel(iter);
-//   return self;
-// }
-
-// Tensor XPUNativeFunctions::sigmoid(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sigmoid_kernel(iter);
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::sigmoid_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(self, self);
-//   native::xpu::sigmoid_kernel(iter);
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::sigmoid_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_float_op(out, self);
-//   native::xpu::sigmoid_kernel(iter);
-//   return out;
-// }
-
-// Tensor XPUNativeFunctions::sgn(const Tensor& self) {
-//   Tensor out;
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(out, self);
-//   if (self.is_complex()) {
-//     native::xpu::sgn_kernel(iter);
-//   } else {
-//     native::xpu::sign_kernel(iter);
-//   }
-//   return iter.output();
-// }
-
-// Tensor& XPUNativeFunctions::sgn_(Tensor& self) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(self, self);
-//   if (self.is_complex()) {
-//     native::xpu::sgn_kernel(iter);
-//   } else {
-//     native::xpu::sign_kernel(iter);
-//   }
-//   return self;
-// }
-
-// Tensor& XPUNativeFunctions::sgn_out(const Tensor& self, Tensor& out) {
-//   TensorIterator iter;
-//   iter.build_borrowing_unary_op(out, self);
-//   if (self.is_complex()) {
-//     native::xpu::sgn_kernel(iter);
-//   } else {
-//     native::xpu::sign_kernel(iter);
-//   }
-//   return out;
-// }
-
 } // namespace at
diff --git a/src/ATen/native/xpu/XPUScalar.cpp b/src/ATen/native/xpu/XPUScalar.cpp
index 3becba25d..bb8e93e60 100644
--- a/src/ATen/native/xpu/XPUScalar.cpp
+++ b/src/ATen/native/xpu/XPUScalar.cpp
@@ -2,7 +2,6 @@
 #include <ATen/Dispatch_v2.h>
 #include <ATen/EmptyTensor.h>
 #include <ATen/core/Tensor.h>
-// #include <ATen/xpu/XPUNativeFunctions.h>
 #include <comm/SYCLContext.h>
 
 #ifndef AT_PER_OPERATOR_HEADERS