Merge pull request #176

Adapt PyTorch 1.9.0 and support the C++ library.
intel · Aug 18, 2021 · edc68c5 · edc68c5
2 parents eb4923d + ba72bad
commit edc68c5
Show file tree

Hide file tree

Showing 64 changed files with 686 additions and 410 deletions.
diff --git a/.gitignore b/.gitignore
@@ -89,7 +89,7 @@ torch/share/
 torch/test/
 torch/version.py
 
-intel_pytorch_extension_py/version.py
+torch_ipex/version.py
 torch_ipex/csrc/version.cpp
 torch_ipex/csrc/aten_ipex_sparse_type_default.*
 torch_ipex/csrc/cpu/SparseOPs*

diff --git a/.gitmodules b/.gitmodules
@@ -1,12 +1,6 @@
-[submodule "third_party/pybind11"]
-	path = third_party/pybind11
-	url = https://github.com/pybind/pybind11.git
 [submodule "third_party/mkl-dnn"]
 	path = third_party/mkl-dnn
     url = https://github.com/oneapi-src/oneDNN
 [submodule "third_party/xsmm"]
 	path = third_party/xsmm
 	url = https://github.com/hfp/libxsmm.git
-[submodule "third_party/torch_ccl"]
-	path = third_party/torch_ccl
-	url = https://github.com/intel/torch-ccl.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,12 +5,11 @@ set(CMAKE_INSTALL_MESSAGE NEVER)
 # set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-set(PLUGIN_NAME _torch_ipex)
+set(PLUGIN_NAME torch_ipex)
 
 set(RPATH_VALUE $ORIGIN)
 set(CMAKE_SKIP_BUILD_RPATH  FALSE)
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-set(CMAKE_INSTALL_RPATH "${RPATH_VALUE}/lib/")
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)
 
 set(DPCPP_ROOT "${PROJECT_SOURCE_DIR}/torch_ipex/csrc")
@@ -20,6 +19,4 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
 
 # Common dependencies
 
-add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/pybind11)
-
 include(cmake/CPU.cmake)
diff --git a/README.md b/README.md
@@ -20,6 +20,7 @@ Intel® Extension for PyTorch (IPEX) is a Python package to extend official PyTo
 ### Install PyTorch (Optional)
  |IPEX Version|PyTorch Version|
  |--|--|
+ |[v1.9.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.9.0)|[v1.9.0](https://github.com/pytorch/pytorch/tree/v1.9.0 "v1.9.0")|
  |[v1.8.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.8.0)|[v1.8.0](https://github.com/pytorch/pytorch/tree/v1.8.0 "v1.8.0")|
  |[v1.2.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.2.0)|[v1.7.0](https://github.com/pytorch/pytorch/tree/v1.7.0 "v1.7.0")|
  |[v1.1.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.1.0)|[v1.5.0-rc3](https://github.com/pytorch/pytorch/tree/v1.5.0-rc3 "v1.5.0-rc3")|
@@ -38,16 +39,15 @@ From IPEX 1.8.0, compiling PyTorch from source is not required. If you still wan
 ### Install IPEX via wheel file
 
 ```
-python -m pip install torch_ipex==1.8.0 -f https://software.intel.com/ipex-whl-stable
+python -m pip install torch_ipex==1.9.0 -f https://software.intel.com/ipex-whl-stable
 ```
 
 :information_source: Wheel files availability for Python versions
 
 | IPEX Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 |
 | :--: | :--: | :--: | :--: | :--: |
-| 1.8.0 |  | :heavy_check_mark: | | |
-
-**Note**: Currently we only provide wheel file for Python 3.7. For other Python versions, please follow instructions in the following section to compile from source.
+| 1.9.0 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
+| 1.8.0 |  | :heavy_check_mark: |  |  |
 
 ### Install IPEX by compiling from source
 

diff --git a/cmake/CPU.cmake b/cmake/CPU.cmake
@@ -11,8 +11,7 @@ SET(DNNL_ENABLE_PRIMITIVE_CACHE TRUE CACHE BOOL "" FORCE)
 SET(DNNL_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
 
 set(DPCPP_CPU_ROOT "${PROJECT_SOURCE_DIR}/torch_ipex/csrc/cpu")
-add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/mkl-dnn)
-find_package(TorchCCL REQUIRED)
+add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/mkl-dnn EXCLUDE_FROM_ALL)
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
 
 FIND_PACKAGE(AVX)
@@ -141,9 +140,7 @@ endif()
 include_directories(${PROJECT_SOURCE_DIR})
 include_directories(${PROJECT_SOURCE_DIR}/torch_ipex)
 include_directories(${PROJECT_SOURCE_DIR}/torch_ipex/csrc/)
-include_directories(${DPCPP_THIRD_PARTY_ROOT}/pybind11/include)
 include_directories(${DPCPP_THIRD_PARTY_ROOT}/xsmm/include)
-include_directories(${TORCHCCL_INCLUDE_DIR})
 
 # sources
 set(DPCPP_SRCS)
@@ -167,9 +164,8 @@ ExternalProject_Add(xsmm
     "-j"
   INSTALL_COMMAND ""
   )
-# Compile code with pybind11
 set(DPCPP_SRCS ${DPCPP_ATEN_SRCS} ${DPCPP_COMMON_SRCS} ${DPCPP_CPU_SRCS} ${DPCPP_JIT_SRCS})
-pybind11_add_module(${PLUGIN_NAME} SHARED ${DPCPP_SRCS})
+add_library(${PLUGIN_NAME} SHARED ${DPCPP_SRCS})
 target_link_libraries(${PLUGIN_NAME} PRIVATE ${DPCPP_THIRD_PARTY_ROOT}/xsmm/lib/libxsmm.a)
 
 #link_directories(${PYTORCH_INSTALL_DIR}/lib)
@@ -188,15 +184,15 @@ else()
   message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
 endif()
 
-add_dependencies(${PLUGIN_NAME} pybind11)
-add_dependencies(${PLUGIN_NAME} torch_ccl)
 add_dependencies(${PLUGIN_NAME} dnnl)
 target_link_libraries(${PLUGIN_NAME} PUBLIC dnnl)
 add_dependencies(${PLUGIN_NAME} xsmm)
-target_link_libraries(${PLUGIN_NAME} PUBLIC torch_ccl)
 link_directories(${PYTORCH_INSTALL_DIR}/lib)
-target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libtorch_python.so)
 target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libtorch_cpu.so)
 target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libc10.so)
 
 target_compile_options(${PLUGIN_NAME} PRIVATE "-DC10_BUILD_MAIN_LIB")
+
+#set_property(TARGET ${PLUGIN_NAME} PROPERTY VERSION "${IPEX_VERSION}")
+#set_property(TARGET ${PLUGIN_NAME} PROPERTY SOVERSION "${IPEX_VERSION}")
+install(TARGETS ${PLUGIN_NAME} LIBRARY DESTINATION lib)
diff --git a/cmake/Modules/FindTorchCCL.cmake b/cmake/Modules/FindTorchCCL.cmake
@@ -17,7 +17,10 @@ SET(TORCHCCL_INCLUDE_DIR)
 
 SET(TORCHCCL_ROOT "${PROJECT_SOURCE_DIR}/third_party/torch_ccl")
 
+SET(CMAKE_INSTALL_PREFIX_SAVED "${CMAKE_INSTALL_PREFIX}")
+SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}/../torch_ccl")
 ADD_SUBDIRECTORY(${TORCHCCL_ROOT})
+SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}")
 IF(NOT TARGET torch_ccl)
     MESSAGE(FATAL_ERROR "Failed to include torch_ccl target")
 ENDIF()

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,12 +1,12 @@
 # syntax = docker/dockerfile:experimental
 # based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile
-# 
+#
 # NOTE: To build this you will need a docker version > 18.06 with
 #       experimental enabled and DOCKER_BUILDKIT=1
 #
 #       If you do not use buildkit you are not going to have a good time
 #
-#       For reference: 
+#       For reference:
 #           https://docs.docker.com/develop/develop-images/build_enhancements/
 
 ARG BASE_IMAGE=ubuntu:20.04
@@ -26,6 +26,7 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     libjpeg-dev \
     pybind11-dev \
     libpng-dev \
+    pybind11-dev \
     && rm -rf /var/lib/apt/lists/*
 RUN /usr/sbin/update-ccache-symlinks
 RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
@@ -41,24 +42,29 @@ RUN curl -fsSL -v -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Mini
     /opt/conda/bin/conda clean -ya
 
 FROM dev-base AS build
-ARG IPEX_VERSION=v1.8.0
-ARG PYTORCH_VERSION=v1.8.0
+ARG IPEX_VERSION=v1.9.0
+ARG PYTORCH_VERSION=v1.9.0
+ARG TORCHVISION_VERSION=0.10.0+cpu
+ARG TORCHAUDIO_VERSION=0.9.0
 COPY --from=conda /opt/conda /opt/conda
 RUN --mount=type=cache,target=/opt/ccache \
-    pip3 install torch==${PYTORCH_VERSION}+cpu torchvision \
-    -f https://download.pytorch.org/whl/torch_stable.html && \
-    git clone -b ${IPEX_VERSION} --single-branch https://github.com/intel/intel-extension-for-pytorch && \
-    cd intel-extension-for-pytorch && git submodule sync && \
+    pip install torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \
+    git clone https://github.com/intel/intel-extension-for-pytorch && \
+    cd intel-extension-for-pytorch && \
+    git checkout ${IPEX_VERSION} && \
+    git submodule sync && \
     git submodule update --init --recursive && \
     pip3 install -r requirements.txt && \
-    pip3 install -v . && rm -rf *
+    python setup.py bdist_wheel && \
+    pip3 install dist/*.whl && \
+    cd .. && rm -rf intel-extension-for-pytorch
 
 FROM dev-base as dev
 COPY --from=build /opt/conda /opt/conda
 ARG OMP_NUM_THREADS=1
 ENV OMP_NUM_THREADS ${OMP_NUM_THREADS}
 ARG KMP_BLOCKTIME=1
-ENV KMP_BLOCKTIME ${KMP_BLOCKTIME} 
+ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
 ARG KMP_HW_SUBSET=1T
 ENV KMP_HW_SUBSET ${KMP_HW_SUBSET}
 ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"

diff --git a/docker/README.md b/docker/README.md
@@ -10,6 +10,6 @@
 
   ```console
   $ cd $DOCKERFILE_DIR
-  $ DOCKER_BUILDKIT=1 docker build --build-arg IPEX_VERSION=v1.8.0 --build-arg PYTORCH_VERSION=v1.8.0 -t intel-extension-for-pytorch:test .
+  $ DOCKER_BUILDKIT=1 docker build -t intel-extension-for-pytorch:test .
   $ docker run intel-extension-for-pytorch:test python -c "import torch;import intel_pytorch_extension as ipex;print('torch:', torch.__version__,' ipex:',ipex.__version__)"
   ```
diff --git a/intel_pytorch_extension_py/ops/embeddingbag.py b/intel_pytorch_extension_py/ops/embeddingbag.py
diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py
@@ -32,6 +32,7 @@
     'aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)',
     'aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
     'aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor',
+    # 'aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor',
     'aten::native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)',
     'aten::native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)',
     'aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor',
@@ -75,7 +76,7 @@
     'aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor',
     'aten::gelu(Tensor self) -> Tensor',
     'aten::gelu_backward(Tensor grad, Tensor self) -> Tensor',
-    'aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=0, int? end=9223372036854775807, int step=1) -> Tensor(a)',
+    'aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor(a)',
     'aten::select.int(Tensor(a) self, int dim, int index) -> Tensor(a)',
     'aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)',
     'aten::unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]',
@@ -112,6 +113,10 @@
     'aten::div.Scalar(Tensor self, Scalar other) -> Tensor',
     'aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
     'aten::permute(Tensor(a) self, int[] dims) -> Tensor(a)',
+    'aten::to.dtype_layout(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
+    'aten::to.device(Tensor self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
+    'aten::to.dtype(Tensor self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
+    'aten::to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
 ]
 
 _FN_IPEX_FUNCS_WITH_SIMPLE_ATEN_SIG = [
@@ -126,6 +131,41 @@
     'aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
 ]
 
+_FN_EXCLUDE_FUNCS_WITH_SIMPLE_ATEN_SIG = [
+    "aten::conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor",
+    "aten::conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor",
+    "aten::conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor",
+    "aten::conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding=\"valid\", int[1] dilation=1, int groups=1) -> Tensor",
+    "aten::conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding=\"valid\", int[2] dilation=1, int groups=1) -> Tensor",
+    "aten::conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding=\"valid\", int[3] dilation=1, int groups=1) -> Tensor",
+    "aten::convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor",
+    "aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor",
+    "aten::_convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor",
+    "aten::conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor",
+    "aten::conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor",
+    "aten::conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor",
+    "aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor",
+    "aten::cross_entropy_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
+    "aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor",
+    "aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor",
+    "aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor",
+    "aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)",
+    "aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)",
+    "aten::dropout(Tensor input, float p, bool train) -> Tensor",
+    "aten::dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)",
+    "aten::nll_loss_nd(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
+    "aten::nll_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
+    "aten::nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)",
+    "aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor",
+    "aten::_batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)",
+    "aten::reshape(Tensor(a) self, int[] shape) -> Tensor(a)",
+    "aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor",
+    "aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor",
+    "aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor",
+    "aten::where.Scalar(Tensor condition, Scalar self, Scalar other) -> Tensor",
+    "aten::nll_loss2d(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
+]
+
 _SHALLOW_FALLBACK_TO_CPU_TENSOR_LIST = 'shallowFallbackToCPUTensorList'
 _SHALLOW_FALLBACK_TO_CPU_TENSOR = 'shallowFallbackToCPUTensor'
 _SHALLOW_UPGRADE_TO_DPCPP_TENSOR = 'shallowUpgradeToDPCPPTensor'
@@ -221,6 +261,13 @@ def is_dnnl_func(self, simple_aten_sig):
                 return True
         return False
 
+    def is_exclude_func(self, simple_aten_sig):
+        stripped_str = simple_aten_sig.replace(' ', '')
+        for item in _FN_EXCLUDE_FUNCS_WITH_SIMPLE_ATEN_SIG:
+            if stripped_str == item.replace(' ', ''):
+                return True
+        return False
+
     def is_ipex_func(self, simple_aten_sig):
         stripped_str = simple_aten_sig.replace(' ', '')
         for item in _FN_IPEX_FUNCS_WITH_SIMPLE_ATEN_SIG:
@@ -580,6 +627,9 @@ def is_conv_overrideable_func(fname):
 
         func_defs = []
         for cpp_sig, aten_sig, native_cpp_sig, cpp_func_sig_str, aten_func_sig_str in self._sigs:
+            if self.is_exclude_func(aten_func_sig_str):
+                continue
+
             # The operator name should be unique because the new registration mechanism of PyTorch 1.7
             new_cpp_func_name = aten_sig.def_name.replace('.', '_')
             cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_func_sig_str, cpp_sig.def_name, new_cpp_func_name)