Skip to content

Commit

Permalink
Merge branch 'main' into zhiwei/codegen
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhiweiYan-96 committed Sep 11, 2024
2 parents 283c6f7 + 1206590 commit 33cebd2
Show file tree
Hide file tree
Showing 273 changed files with 1,575 additions and 920 deletions.
15 changes: 9 additions & 6 deletions cmake/BuildFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,17 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC"
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -cl-fp32-correctly-rounded-divide-sqrt")
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "-options '${SYCL_OFFLINE_COMPILER_CG_OPTIONS}'")

if((DEFINED ENV{TORCH_XPU_ARCH_LIST}) AND NOT ("$ENV{TORCH_XPU_ARCH_LIST}" STREQUAL ""))
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device $ENV{TORCH_XPU_ARCH_LIST}")
if(WIN32)
set(AOT_TARGETS "ats-m150,lnl-m,mtl-u,mtl-h")
else()
set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,xe-lpg,ats-m150")
message(STATUS "'TORCH_XPU_ARCH_LIST' not set. Using default configuration for a full AOT build."
"Try specifying from 'pvc,xe-lpg,ats-m150' if you don't need.")
set(AOT_TARGETS "pvc,xe-lpg,ats-m150")
endif()
if((DEFINED ENV{TORCH_XPU_ARCH_LIST}) AND NOT ("$ENV{TORCH_XPU_ARCH_LIST}" STREQUAL ""))
set(AOT_TARGETS "$ENV{TORCH_XPU_ARCH_LIST}")
endif()
message(STATUS " SYCL_OFFLINE_COMPILER_AOT_OPTIONS: ${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}")

set(SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS}")
message(STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS}")

set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS} ${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
else()
Expand Down
6 changes: 6 additions & 0 deletions cmake/Modules/FindSYCL/run_sycl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ endforeach()
# Choose host flags in FindSYCL.cmake
@SYCL_host_flags@

# Adding permissive flag for MSVC build to overcome ambiguous symbol error.
if(WIN32)
string(APPEND SYCL_host_compiler_flags "/permissive- ")
endif()


list(REMOVE_DUPLICATES CMAKE_HOST_FLAGS)
foreach(flag ${CMAKE_HOST_FLAGS})
# Extra quotes are added around each flag to help SYCL parse out flags with spaces.
Expand Down
5 changes: 4 additions & 1 deletion src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# ATen XPU sources

file(GLOB xpu_cpp "xpu/*.cpp" "native/xpu/*.cpp" "native/sparse/*.cpp")
file(GLOB xpu_cpp "xpu/*.cpp")
file(GLOB xpu_native_cpp "native/xpu/*.cpp" "native/sparse/*.cpp")
file(GLOB xpu_sycl "native/xpu/sycl/*.cpp")

list(APPEND ATen_XPU_CPP_SRCS ${xpu_cpp})
list(APPEND ATen_XPU_NATIVE_CPP_SRCS ${xpu_native_cpp})
list(APPEND ATen_XPU_SYCL_SRCS ${xpu_sycl})

set(ATen_XPU_CPP_SRCS ${ATen_XPU_CPP_SRCS} PARENT_SCOPE)
set(ATen_XPU_NATIVE_CPP_SRCS ${ATen_XPU_NATIVE_CPP_SRCS} PARENT_SCOPE)
set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
30 changes: 29 additions & 1 deletion src/ATen/native/xpu/RangeFactories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Tensor& arange_out(

TORCH_CHECK(xstep > 0 || xstep < 0, "step must be nonzero");
TORCH_CHECK(
std::isfinite(xstart) && std::isfinite(xend),
std::isfinite(static_cast<double>(xstart)) && std::isfinite(static_cast<double>(xend)),
"unsupported range: ",
xstart,
" -> ",
Expand Down Expand Up @@ -87,5 +87,33 @@ Tensor& arange_out(

return xpu::arange_kernel(start, end, step, out);
}

Tensor& range_xpu_out(
const Scalar& start,
const Scalar& end,
const Scalar& step,
Tensor& out) {
auto xstart = start.to<double>();
auto xend = end.to<double>();
auto xstep = step.to<double>();

TORCH_CHECK(xstep > 0 || xstep < 0, "step must be nonzero");
TORCH_CHECK(
std::isfinite(static_cast<double>(xstart)) && std::isfinite(static_cast<double>(xend)),
"unsupported range: ",
xstart,
" -> ",
xend);
TORCH_CHECK(
((xstep > 0) && (xend >= xstart)) || ((xstep < 0) && (xend <= xstart)),
"upper bound and larger bound inconsistent with step sign");
int64_t size = static_cast<int64_t>(((xend - xstart) / xstep) + 1);
if (out.numel() != size) {
out.resize_({size});
}

return at::native::xpu::range_kernel(start, end, step, out);
}

} // namespace native
} // namespace at
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/AbsKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/AbsKernel.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/AbsKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

namespace at::native::xpu {

void abs_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void abs_kernel(TensorIteratorBase& iter);

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationEluKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationEluKernels.h>

namespace at::native::xpu {

template <typename scalar_t, typename opmath_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationEluKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

namespace at::native::xpu {

void elu_kernel(
TORCH_XPU_API void elu_kernel(
TensorIteratorBase& iter,
const Scalar& alpha,
const Scalar& scale,
const Scalar& input_scale);

void elu_backward_kernel(
TORCH_XPU_API void elu_backward_kernel(
TensorIteratorBase& iter,
const Scalar& alpha,
const Scalar& scale,
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationGeluKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <comm/XPUMathCompat.h>
#include <comm/xpu_aten.h>

#include <ATen/native/xpu/sycl/ActivationGeluKernel.h>

namespace at {
namespace native {
namespace xpu {
Expand Down
6 changes: 4 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationGeluKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ namespace at {
namespace native {
namespace xpu {

void gelu_kernel(TensorIteratorBase& iter, c10::string_view approximate);
TORCH_XPU_API void gelu_kernel(
TensorIteratorBase& iter,
c10::string_view approximate);

void gelu_backward_kernel(
TORCH_XPU_API void gelu_backward_kernel(
TensorIteratorBase& iter,
c10::string_view approximate);

Expand Down
3 changes: 3 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationGluKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
#include <ATen/OpMathType.h>
#include <ATen/TensorIterator.h>

#include <ATen/native/xpu/sycl/Loops.h>
#include <ATen/native/xpu/sycl/Loops.h>
#include <comm/SYCLContext.h>

#include <ATen/native/xpu/sycl/ActivationGluKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationGluKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

namespace at::native::xpu {

void glu_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void glu_kernel(TensorIteratorBase& iter);

void glu_backward_kernel(
TORCH_XPU_API void glu_backward_kernel(
const TensorIteratorBase& iter,
int64_t gI_stride,
int64_t I_stride);
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationHardsigmoidKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationHardsigmoidKernels.h>

namespace at::native::xpu {

template <typename scalar_t, typename opmath_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationHardsigmoidKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

namespace at::native::xpu {

void hardsigmoid_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void hardsigmoid_kernel(TensorIteratorBase& iter);

void hardsigmoid_backward_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void hardsigmoid_backward_kernel(TensorIteratorBase& iter);

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationHardswishKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <comm/XPUMathCompat.h>
#include <comm/xpu_aten.h>

#include <ATen/native/xpu/sycl/ActivationHardswishKernels.h>

namespace at {
namespace native {
namespace xpu {
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationHardswishKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ namespace at {
namespace native {
namespace xpu {

void hardswish_kernel(TensorIterator& iter);
TORCH_XPU_API void hardswish_kernel(TensorIterator& iter);

void hardswish_backward_kernel(TensorIterator& iter);
TORCH_XPU_API void hardswish_backward_kernel(TensorIterator& iter);

} // namespace xpu
} // namespace native
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationHardtanhKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <ATen/native/xpu/sycl/Loops.h>
#include <comm/XPUMathCompat.h>

#include <ATen/native/xpu/sycl/ActivationHardtanhKernels.h>

namespace at {
namespace native {
namespace xpu {
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/ActivationHardtanhKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace at {
namespace native {
namespace xpu {

void hardtanh_backward_kernel(
TORCH_XPU_API void hardtanh_backward_kernel(
TensorIterator& iter,
const Scalar& min,
const Scalar& max);
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationLeakyReluKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationLeakyReluKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
6 changes: 4 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationLeakyReluKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

namespace at::native::xpu {

void leaky_relu_kernel(TensorIteratorBase& iter, const Scalar& negval_);
TORCH_XPU_API void leaky_relu_kernel(
TensorIteratorBase& iter,
const Scalar& negval_);

void leaky_relu_backward_kernel(
TORCH_XPU_API void leaky_relu_backward_kernel(
TensorIteratorBase& iter,
const Scalar& negval_);

Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationLogSigmoidKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationLogSigmoidKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationLogSigmoidKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

namespace at::native::xpu {

void log_sigmoid_forward_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void log_sigmoid_forward_kernel(TensorIteratorBase& iter);

void log_sigmoid_backward_kernel(TensorIterator& iter);
TORCH_XPU_API void log_sigmoid_backward_kernel(TensorIterator& iter);

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationMishKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <ATen/native/xpu/sycl/Loops.h>
#include <comm/XPUMathCompat.h>

#include <ATen/native/xpu/sycl/ActivationMishKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationMishKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

namespace at::native::xpu {

void mish_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void mish_kernel(TensorIteratorBase& iter);

void mish_backward_kernel(TensorIterator& iter);
TORCH_XPU_API void mish_backward_kernel(TensorIterator& iter);

} // namespace at::native::xpu
4 changes: 3 additions & 1 deletion src/ATen/native/xpu/sycl/ActivationPreluKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationPreluKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down Expand Up @@ -40,4 +42,4 @@ void prelu_backward_kernel(TensorIterator& iter) {
});
}

} // namespace at::native::xpu
} // namespace at::native::xpu
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationPreluKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

namespace at::native::xpu {

void prelu_kernel(TensorIterator& iter);
TORCH_XPU_API void prelu_kernel(TensorIterator& iter);

void prelu_backward_kernel(TensorIterator& iter);
TORCH_XPU_API void prelu_backward_kernel(TensorIterator& iter);

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationSiluKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <ATen/native/xpu/sycl/Loops.h>
#include <comm/XPUMathCompat.h>

#include <ATen/native/xpu/sycl/ActivationSiluKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationSiluKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

namespace at::native::xpu {

void silu_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void silu_kernel(TensorIteratorBase& iter);

void silu_backward_kernel(TensorIteratorBase& iter);
TORCH_XPU_API void silu_backward_kernel(TensorIteratorBase& iter);

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationSoftplusKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationSoftplusKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationSoftplusKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

namespace at::native::xpu {

void softplus_kernel(
TORCH_XPU_API void softplus_kernel(
TensorIteratorBase& iter,
const Scalar& beta_,
const Scalar& threshold_);

void softplus_backward_kernel(
TORCH_XPU_API void softplus_backward_kernel(
TensorIteratorBase& iter,
const Scalar& beta_,
const Scalar& threshold_);
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/ActivationSoftshrinkKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#include <ATen/native/xpu/sycl/Loops.h>

#include <ATen/native/xpu/sycl/ActivationSoftshrinkKernels.h>

namespace at::native::xpu {

template <typename scalar_t>
Expand Down
Loading

0 comments on commit 33cebd2

Please sign in to comment.