Skip to content

Commit

Permalink
Merge pull request #176
Browse files Browse the repository at this point in the history
Adapt PyTorch 1.9.0 and support the C++ library.
  • Loading branch information
EikanWang authored Aug 18, 2021
2 parents eb4923d + ba72bad commit edc68c5
Show file tree
Hide file tree
Showing 64 changed files with 686 additions and 410 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ torch/share/
torch/test/
torch/version.py

intel_pytorch_extension_py/version.py
torch_ipex/version.py
torch_ipex/csrc/version.cpp
torch_ipex/csrc/aten_ipex_sparse_type_default.*
torch_ipex/csrc/cpu/SparseOPs*
Expand Down
6 changes: 0 additions & 6 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
[submodule "third_party/pybind11"]
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "third_party/mkl-dnn"]
path = third_party/mkl-dnn
url = https://github.com/oneapi-src/oneDNN
[submodule "third_party/xsmm"]
path = third_party/xsmm
url = https://github.com/hfp/libxsmm.git
[submodule "third_party/torch_ccl"]
path = third_party/torch_ccl
url = https://github.com/intel/torch-ccl.git
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ set(CMAKE_INSTALL_MESSAGE NEVER)
# set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(PLUGIN_NAME _torch_ipex)
set(PLUGIN_NAME torch_ipex)

set(RPATH_VALUE $ORIGIN)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set(CMAKE_INSTALL_RPATH "${RPATH_VALUE}/lib/")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE)

set(DPCPP_ROOT "${PROJECT_SOURCE_DIR}/torch_ipex/csrc")
Expand All @@ -20,6 +19,4 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)

# Common dependencies

add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/pybind11)

include(cmake/CPU.cmake)
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Intel® Extension for PyTorch (IPEX) is a Python package to extend official PyTo
### Install PyTorch (Optional)
|IPEX Version|PyTorch Version|
|--|--|
|[v1.9.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.9.0)|[v1.9.0](https://github.com/pytorch/pytorch/tree/v1.9.0 "v1.9.0")|
|[v1.8.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.8.0)|[v1.8.0](https://github.com/pytorch/pytorch/tree/v1.8.0 "v1.8.0")|
|[v1.2.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.2.0)|[v1.7.0](https://github.com/pytorch/pytorch/tree/v1.7.0 "v1.7.0")|
|[v1.1.0](https://github.com/intel/intel-extension-for-pytorch/tree/v1.1.0)|[v1.5.0-rc3](https://github.com/pytorch/pytorch/tree/v1.5.0-rc3 "v1.5.0-rc3")|
Expand All @@ -38,16 +39,15 @@ From IPEX 1.8.0, compiling PyTorch from source is not required. If you still wan
### Install IPEX via wheel file

```
python -m pip install torch_ipex==1.8.0 -f https://software.intel.com/ipex-whl-stable
python -m pip install torch_ipex==1.9.0 -f https://software.intel.com/ipex-whl-stable
```

:information_source: Wheel files availability for Python versions

| IPEX Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 |
| :--: | :--: | :--: | :--: | :--: |
| 1.8.0 | | :heavy_check_mark: | | |

**Note**: Currently we only provide wheel file for Python 3.7. For other Python versions, please follow instructions in the following section to compile from source.
| 1.9.0 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| 1.8.0 | | :heavy_check_mark: | | |

### Install IPEX by compiling from source

Expand Down
16 changes: 6 additions & 10 deletions cmake/CPU.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ SET(DNNL_ENABLE_PRIMITIVE_CACHE TRUE CACHE BOOL "" FORCE)
SET(DNNL_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)

set(DPCPP_CPU_ROOT "${PROJECT_SOURCE_DIR}/torch_ipex/csrc/cpu")
add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/mkl-dnn)
find_package(TorchCCL REQUIRED)
add_subdirectory(${DPCPP_THIRD_PARTY_ROOT}/mkl-dnn EXCLUDE_FROM_ALL)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)

FIND_PACKAGE(AVX)
Expand Down Expand Up @@ -141,9 +140,7 @@ endif()
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/torch_ipex)
include_directories(${PROJECT_SOURCE_DIR}/torch_ipex/csrc/)
include_directories(${DPCPP_THIRD_PARTY_ROOT}/pybind11/include)
include_directories(${DPCPP_THIRD_PARTY_ROOT}/xsmm/include)
include_directories(${TORCHCCL_INCLUDE_DIR})

# sources
set(DPCPP_SRCS)
Expand All @@ -167,9 +164,8 @@ ExternalProject_Add(xsmm
"-j"
INSTALL_COMMAND ""
)
# Compile code with pybind11
set(DPCPP_SRCS ${DPCPP_ATEN_SRCS} ${DPCPP_COMMON_SRCS} ${DPCPP_CPU_SRCS} ${DPCPP_JIT_SRCS})
pybind11_add_module(${PLUGIN_NAME} SHARED ${DPCPP_SRCS})
add_library(${PLUGIN_NAME} SHARED ${DPCPP_SRCS})
target_link_libraries(${PLUGIN_NAME} PRIVATE ${DPCPP_THIRD_PARTY_ROOT}/xsmm/lib/libxsmm.a)

#link_directories(${PYTORCH_INSTALL_DIR}/lib)
Expand All @@ -188,15 +184,15 @@ else()
message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
endif()

add_dependencies(${PLUGIN_NAME} pybind11)
add_dependencies(${PLUGIN_NAME} torch_ccl)
add_dependencies(${PLUGIN_NAME} dnnl)
target_link_libraries(${PLUGIN_NAME} PUBLIC dnnl)
add_dependencies(${PLUGIN_NAME} xsmm)
target_link_libraries(${PLUGIN_NAME} PUBLIC torch_ccl)
link_directories(${PYTORCH_INSTALL_DIR}/lib)
target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libtorch_python.so)
target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libtorch_cpu.so)
target_link_libraries(${PLUGIN_NAME} PUBLIC ${PYTORCH_INSTALL_DIR}/lib/libc10.so)

target_compile_options(${PLUGIN_NAME} PRIVATE "-DC10_BUILD_MAIN_LIB")

#set_property(TARGET ${PLUGIN_NAME} PROPERTY VERSION "${IPEX_VERSION}")
#set_property(TARGET ${PLUGIN_NAME} PROPERTY SOVERSION "${IPEX_VERSION}")
install(TARGETS ${PLUGIN_NAME} LIBRARY DESTINATION lib)
3 changes: 3 additions & 0 deletions cmake/Modules/FindTorchCCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ SET(TORCHCCL_INCLUDE_DIR)

SET(TORCHCCL_ROOT "${PROJECT_SOURCE_DIR}/third_party/torch_ccl")

SET(CMAKE_INSTALL_PREFIX_SAVED "${CMAKE_INSTALL_PREFIX}")
SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}/../torch_ccl")
ADD_SUBDIRECTORY(${TORCHCCL_ROOT})
SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}")
IF(NOT TARGET torch_ccl)
MESSAGE(FATAL_ERROR "Failed to include torch_ccl target")
ENDIF()
Expand Down
26 changes: 16 additions & 10 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# syntax = docker/dockerfile:experimental
# based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile
#
#
# NOTE: To build this you will need a docker version > 18.06 with
# experimental enabled and DOCKER_BUILDKIT=1
#
# If you do not use buildkit you are not going to have a good time
#
# For reference:
# For reference:
# https://docs.docker.com/develop/develop-images/build_enhancements/

ARG BASE_IMAGE=ubuntu:20.04
Expand All @@ -26,6 +26,7 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
libjpeg-dev \
pybind11-dev \
libpng-dev \
pybind11-dev \
&& rm -rf /var/lib/apt/lists/*
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
Expand All @@ -41,24 +42,29 @@ RUN curl -fsSL -v -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Mini
/opt/conda/bin/conda clean -ya

FROM dev-base AS build
ARG IPEX_VERSION=v1.8.0
ARG PYTORCH_VERSION=v1.8.0
ARG IPEX_VERSION=v1.9.0
ARG PYTORCH_VERSION=v1.9.0
ARG TORCHVISION_VERSION=0.10.0+cpu
ARG TORCHAUDIO_VERSION=0.9.0
COPY --from=conda /opt/conda /opt/conda
RUN --mount=type=cache,target=/opt/ccache \
pip3 install torch==${PYTORCH_VERSION}+cpu torchvision \
-f https://download.pytorch.org/whl/torch_stable.html && \
git clone -b ${IPEX_VERSION} --single-branch https://github.com/intel/intel-extension-for-pytorch && \
cd intel-extension-for-pytorch && git submodule sync && \
pip install torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \
git clone https://github.com/intel/intel-extension-for-pytorch && \
cd intel-extension-for-pytorch && \
git checkout ${IPEX_VERSION} && \
git submodule sync && \
git submodule update --init --recursive && \
pip3 install -r requirements.txt && \
pip3 install -v . && rm -rf *
python setup.py bdist_wheel && \
pip3 install dist/*.whl && \
cd .. && rm -rf intel-extension-for-pytorch

FROM dev-base as dev
COPY --from=build /opt/conda /opt/conda
ARG OMP_NUM_THREADS=1
ENV OMP_NUM_THREADS ${OMP_NUM_THREADS}
ARG KMP_BLOCKTIME=1
ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
ARG KMP_HW_SUBSET=1T
ENV KMP_HW_SUBSET ${KMP_HW_SUBSET}
ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"
Expand Down
2 changes: 1 addition & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@

```console
$ cd $DOCKERFILE_DIR
$ DOCKER_BUILDKIT=1 docker build --build-arg IPEX_VERSION=v1.8.0 --build-arg PYTORCH_VERSION=v1.8.0 -t intel-extension-for-pytorch:test .
$ DOCKER_BUILDKIT=1 docker build -t intel-extension-for-pytorch:test .
$ docker run intel-extension-for-pytorch:test python -c "import torch;import intel_pytorch_extension as ipex;print('torch:', torch.__version__,' ipex:',ipex.__version__)"
```
14 changes: 0 additions & 14 deletions intel_pytorch_extension_py/ops/embeddingbag.py

This file was deleted.

52 changes: 51 additions & 1 deletion scripts/cpu/gen-dense-cpu-ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
'aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)',
'aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
'aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor',
# 'aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor',
'aten::native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)',
'aten::native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)',
'aten::avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor',
Expand Down Expand Up @@ -75,7 +76,7 @@
'aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor',
'aten::gelu(Tensor self) -> Tensor',
'aten::gelu_backward(Tensor grad, Tensor self) -> Tensor',
'aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=0, int? end=9223372036854775807, int step=1) -> Tensor(a)',
'aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor(a)',
'aten::select.int(Tensor(a) self, int dim, int index) -> Tensor(a)',
'aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)',
'aten::unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]',
Expand Down Expand Up @@ -112,6 +113,10 @@
'aten::div.Scalar(Tensor self, Scalar other) -> Tensor',
'aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
'aten::permute(Tensor(a) self, int[] dims) -> Tensor(a)',
'aten::to.dtype_layout(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
'aten::to.device(Tensor self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
'aten::to.dtype(Tensor self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
'aten::to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor',
]

_FN_IPEX_FUNCS_WITH_SIMPLE_ATEN_SIG = [
Expand All @@ -126,6 +131,41 @@
'aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)',
]

_FN_EXCLUDE_FUNCS_WITH_SIMPLE_ATEN_SIG = [
"aten::conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor",
"aten::conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor",
"aten::conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor",
"aten::conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding=\"valid\", int[1] dilation=1, int groups=1) -> Tensor",
"aten::conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding=\"valid\", int[2] dilation=1, int groups=1) -> Tensor",
"aten::conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding=\"valid\", int[3] dilation=1, int groups=1) -> Tensor",
"aten::convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor",
"aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor",
"aten::_convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor",
"aten::conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor",
"aten::conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor",
"aten::conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor",
"aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor",
"aten::cross_entropy_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
"aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor",
"aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor",
"aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor",
"aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)",
"aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)",
"aten::dropout(Tensor input, float p, bool train) -> Tensor",
"aten::dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)",
"aten::nll_loss_nd(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
"aten::nll_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
"aten::nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)",
"aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor",
"aten::_batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)",
"aten::reshape(Tensor(a) self, int[] shape) -> Tensor(a)",
"aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor",
"aten::where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor",
"aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor",
"aten::where.Scalar(Tensor condition, Scalar self, Scalar other) -> Tensor",
"aten::nll_loss2d(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor",
]

_SHALLOW_FALLBACK_TO_CPU_TENSOR_LIST = 'shallowFallbackToCPUTensorList'
_SHALLOW_FALLBACK_TO_CPU_TENSOR = 'shallowFallbackToCPUTensor'
_SHALLOW_UPGRADE_TO_DPCPP_TENSOR = 'shallowUpgradeToDPCPPTensor'
Expand Down Expand Up @@ -221,6 +261,13 @@ def is_dnnl_func(self, simple_aten_sig):
return True
return False

def is_exclude_func(self, simple_aten_sig):
stripped_str = simple_aten_sig.replace(' ', '')
for item in _FN_EXCLUDE_FUNCS_WITH_SIMPLE_ATEN_SIG:
if stripped_str == item.replace(' ', ''):
return True
return False

def is_ipex_func(self, simple_aten_sig):
stripped_str = simple_aten_sig.replace(' ', '')
for item in _FN_IPEX_FUNCS_WITH_SIMPLE_ATEN_SIG:
Expand Down Expand Up @@ -580,6 +627,9 @@ def is_conv_overrideable_func(fname):

func_defs = []
for cpp_sig, aten_sig, native_cpp_sig, cpp_func_sig_str, aten_func_sig_str in self._sigs:
if self.is_exclude_func(aten_func_sig_str):
continue

# The operator name should be unique because the new registration mechanism of PyTorch 1.7
new_cpp_func_name = aten_sig.def_name.replace('.', '_')
cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_func_sig_str, cpp_sig.def_name, new_cpp_func_name)
Expand Down
Loading

0 comments on commit edc68c5

Please sign in to comment.