From 5fdb1c91d2d26e62a7f1fe0a8d40ed90e6d3422d Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 22 Mar 2023 00:18:12 -0700 Subject: [PATCH 1/9] feat: Upgrade Pytorch and TensorRT versions Signed-off-by: Dheeraj Peri --- .circleci/config.yml | 155 ++++++++++++------ WORKSPACE | 21 +-- core/runtime/TRTEngine.cpp | 2 +- py/ci/build_whl.sh | 4 +- py/requirements.txt | 8 +- py/setup.py | 2 +- py/torch_tensorrt/__init__.py | 2 +- third_party/tensorrt/archive/BUILD | 10 +- third_party/tensorrt/local/BUILD | 10 +- toolchains/ci_workspaces/WORKSPACE.x86_64 | 2 +- .../WORKSPACE.x86_64.release.rhel | 10 +- .../WORKSPACE.x86_64.release.ubuntu | 10 +- 12 files changed, 135 insertions(+), 101 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3a1e376405..c3ea57f0f1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -149,13 +149,13 @@ commands: default: "amd64" cuda-pkg-name: type: string - default: "cuda-toolkit-11-7" + default: "cuda-toolkit-11-8" cuda-pkg-version: type: string - default: "11-7" + default: "11-8" cuda-version: type: string - default: "11.7.1" + default: "11.8.0" steps: - run: name: Install CUDA @@ -163,8 +163,8 @@ commands: cd ~ wget https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/cuda-<< parameters.os >>.pin sudo mv cuda-<< parameters.os >>.pin /etc/apt/preferences.d/cuda-repository-pin-600 - wget https://developer.download.nvidia.com/compute/cuda/<< parameters.cuda-version >>/local_installers/cuda-repo-ubuntu2004-<< parameters.cuda-pkg-version >>-local_<< parameters.cuda-version >>-515.65.01-1_<< parameters.architecture >>.deb - sudo dpkg -i cuda-repo-ubuntu2004-<< parameters.cuda-pkg-version >>-local_<< parameters.cuda-version >>-515.65.01-1_<< parameters.architecture >>.deb + wget https://developer.download.nvidia.com/compute/cuda/<< parameters.cuda-version >>/local_installers/cuda-repo-ubuntu2004-<< parameters.cuda-pkg-version >>-local_<< parameters.cuda-version >>-520.61.05-1_<< parameters.architecture >>.deb + sudo dpkg -i cuda-repo-ubuntu2004-<< parameters.cuda-pkg-version >>-local_<< parameters.cuda-version >>-520.61.05-1_<< parameters.architecture >>.deb sudo cp /var/cuda-repo-ubuntu2004-<< parameters.cuda-pkg-version >>-local/cuda-*-keyring.gpg /usr/share/keyrings/ sudo apt-get update sudo apt-get install -y cuda @@ -188,22 +188,25 @@ commands: default: "amd64" cuda-pkg-name: type: string - default: "cuda-toolkit-11-7" + default: "cuda-toolkit-11-8" cuda-pkg-version: type: string - default: "11-7" + default: "11-8" cuda-version: type: string - default: "11.7.1" + default: "11.8.0" cuda-string-version: type: string - default: "cuda11.7" + default: "cuda11.8" cudnn-version: type: string default: "8.5.0.96" trt-version-short: type: string - default: "8.5.1" + default: "8.6.0" + trt-version-long: + type: string + default: "8.6.0.12-1" bazel-version: type: string default: "5.2.0" @@ -211,19 +214,10 @@ commands: type: string default: "x86_64" steps: - - uninstall-cuda - - install-cuda: - os: << parameters.os >> - platform: << parameters.platform >> - architecture: << parameters.architecture >> - cuda-pkg-name: << parameters.cuda-pkg-name >> - cuda-pkg-version: << parameters.cuda-pkg-version >> - cuda-version: << parameters.cuda-version >> - install-cudnn: os: << parameters.os >> platform: << parameters.platform >> cudnn-version: << parameters.cudnn-version >> - cuda-version: << parameters.cuda-string-version >> - run: name: Install Tensorrt command: | @@ -232,18 +226,30 @@ commands: sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/ /" sudo apt-get update - sudo apt-get install libnvinfer8=<< parameters.trt-version-short >>* libnvinfer-plugin8=<< parameters.trt-version-short>>* libnvinfer-dev=<< parameters.trt-version-short>>* libnvinfer-plugin-dev=<< parameters.trt-version-short>>* + sudo apt-get install libnvinfer8=<< parameters.trt-version-long >>+<< parameters.cuda-string-version >> libnvinfer-plugin8=<< parameters.trt-version-long>>+<< parameters.cuda-string-version >> libnvinfer-dev=<< parameters.trt-version-long >>+<< parameters.cuda-string-version >> libnvinfer-plugin-dev=<< parameters.trt-version-long>>+<< parameters.cuda-string-version >> libnvinfer-headers-dev=<< parameters.trt-version-long>>+<< parameters.cuda-string-version >> libnvinfer-headers-plugin-dev=<< parameters.trt-version-long>>+<< parameters.cuda-string-version >> - install-bazel: platform: << parameters.bazel-platform >> version: << parameters.bazel-version >> + setup-py-version: + description: "Set python version" + parameters: + python-version: + type: string + default: "3.9.4" + steps: + - run: + name: Set python version + command: | + pyenv install << parameters.python-version >> + pyenv global << parameters.python-version >> create-py-env: description: "Install python dependencies" parameters: trt-version-long: type: string - default: "8.5.1.7" + default: "8.6.0" cudnn-version-long: type: string default: "8.5.0.96" @@ -263,16 +269,16 @@ commands: parameters: torch-build: type: string - default: "2.0.0.dev20230219+cu117" + default: "2.0.0" torch-build-index: type: string - default: "https://download.pytorch.org/whl/nightly/cu117" + default: "https://download.pytorch.org/whl/cu118" steps: - run: name: Install Torch command: | pip3 install --upgrade pip - pip3 install --pre torch==<< parameters.torch-build >> torchvision torchaudio --extra-index-url << parameters.torch-build-index >> + pip3 install torch==<< parameters.torch-build >> torchvision torchaudio --extra-index-url << parameters.torch-build-index >> build-py: description: "Build the torch-tensorrt python release (pre-cxx11-abi)" @@ -284,6 +290,7 @@ commands: - run: name: Build torch-tensorrt python release (pre-cxx11-abi) command: | + export CUDA_HOME=/usr/local/cuda-11.8/ mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE cd py python3 -m pip install wheel setuptools @@ -315,6 +322,7 @@ commands: - run: name: Build torch-tensorrt python release package command: | + export CUDA_HOME=/usr/local/cuda-11.8/ cd ~/project/py python3 setup.py bdist_wheel --use-cxx11-abi --release python3 setup.py install --use-cxx11-abi --release @@ -326,6 +334,7 @@ commands: - run: name: Build torch-tensorrt python package command: | + export CUDA_HOME=/usr/local/cuda-11.8/ cd ~/project/py python3 setup.py bdist_wheel --use-cxx11-abi python3 setup.py install --use-cxx11-abi @@ -339,9 +348,11 @@ commands: type: string default: "x86_64" steps: + - setup-py-version - run: name: Build torch-tensorrt python release with only the fx backend command: | + export CUDA_HOME=/usr/local/cuda-11.8/ mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE cd py python3 -m pip install wheel setuptools @@ -361,6 +372,7 @@ commands: type: string default: nvcr.io/nvidia/pytorch:latest steps: + - setup-py-version - run: name: Log into docker command: | @@ -412,7 +424,7 @@ commands: name: Build torch-tensorrt library with CMake command: | mkdir build - export PATH=$PATH:/usr/local/cuda/bin + export PATH=$PATH:/usr/local/cuda-11.8/bin ~/cmake/bin/cmake -S. -Bbuild \ -DCMAKE_MODULE_PATH=cmake/Module \ -DTorch_DIR=/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch/share/cmake/Torch \ @@ -463,13 +475,13 @@ commands: - run: name: Run core / C++ tests environment: - LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH" + LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH" command: | set -e mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE bazel query 'kind(cc_*, tests(//tests))' --noshow_progress >> /tmp/test_manifest.txt circleci tests split < /tmp/test_manifest.txt > /tmp/node_test_manifest.txt - bazel test $(cat /tmp/node_test_manifest.txt) --test_arg=--gtest_output=xml:/tmp/artifacts/test_results/ --jobs 4 --config ci_testing --config pre_cxx11_abi --noshow_progress --test_timeout=8000 + bazel test $(cat /tmp/node_test_manifest.txt) --test_arg=--gtest_output=xml:/tmp/artifacts/test_results/ --jobs 4 --config ci_testing --config pre_cxx11_abi --noshow_progress --test_timeout=20000 - run: name: Collect logs when: on_fail @@ -493,7 +505,7 @@ commands: environment: USE_HOST_DEPS: "1" PYT_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/" - LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH" + LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH" command: | set -e mkdir -p /tmp/artifacts/test_results @@ -716,14 +728,18 @@ jobs: type: string torch-build-index: type: string + python-version: + type: string cxx11-abi: type: boolean default: false machine: - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: xlarge + image: linux-cuda-11:2023.02.1 + resource_class: gpu.nvidia.small steps: - checkout + - setup-py-version: + python-version: << parameters.python-version >> - create-env: os: "ubuntu2004" platform: "x86_64" @@ -759,6 +775,8 @@ jobs: parameters: torch-build: type: string + python-version: + type: string torch-build-index: type: string trt-version-short: @@ -768,11 +786,13 @@ jobs: cudnn-version: type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.large parallelism: 4 steps: - checkout + - setup-py-version: + python-version: << parameters.python-version >> - create-env: os: "ubuntu2004" platform: "x86_64" @@ -801,13 +821,15 @@ jobs: type: string trt-version-long: type: string + python-version: + type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.large steps: - checkout - - uninstall-cuda - - install-cuda + - setup-py-version: + python-version: << parameters.python-version >> - create-py-env: trt-version-long: << parameters.trt-version-long >> - attach_workspace: @@ -829,11 +851,15 @@ jobs: type: string trt-version-long: type: string + python-version: + type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.large steps: - checkout + - setup-py-version: + python-version: << parameters.python-version >> - attach_workspace: at: /tmp/dist/ - install-torch-from-index: @@ -842,9 +868,6 @@ jobs: - create-py-env: trt-version-long: << parameters.trt-version-long >> - install-cudnn - # - run: - # name: "Set LD_LIBRARY_PATH path to include the installed CUDNN" - # command: export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH - run: name: "Install torch-tensorrt" command: pip3 install --pre /tmp/dist/x86_64-linux/*cp39-cp39*.whl @@ -860,11 +883,15 @@ jobs: type: string trt-version-long: type: string + python-version: + type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.large steps: - checkout + - setup-py-version: + python-version: << parameters.python-version >> - attach_workspace: at: /tmp/dist/ - install-torch-from-index: @@ -894,7 +921,7 @@ jobs: type: string parallelism: 4 machine: - image: ubuntu-2004-cuda-11.4:202110-01 + image: linux-cuda-11:2023.02.1 resource_class: gpu.nvidia.small steps: - when: @@ -942,8 +969,8 @@ jobs: torch-build-index: type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: xlarge + image: linux-cuda-11:2023.02.1 + resource_class: gpu.nvidia.small steps: - when: condition: << parameters.enabled >> @@ -1075,11 +1102,15 @@ jobs: type: string torch-build-index: type: string + python-version: + type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: xlarge + image: linux-cuda-11:2023.02.1 + resource_class: gpu.nvidia.small steps: - checkout + - setup-py-version: + python-version: << parameters.python-version >> - create-env: os: "ubuntu2004" platform: "x86_64" @@ -1110,8 +1141,8 @@ jobs: torch-base-image: type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: xlarge + image: linux-cuda-11:2023.02.1 + resource_class: gpu.nvidia.small steps: - checkout - build-py-ngc: @@ -1138,8 +1169,8 @@ jobs: torch-base-image: type: string machine: - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: xlarge + image: linux-cuda-11:2023.02.1 + resource_class: gpu.nvidia.small steps: - when: condition: << parameters.enabled >> @@ -1167,14 +1198,17 @@ parameters: bazel-version: type: string default: "5.2.0" + python-version: + type: string + default: "3.9.4" # Nightly platform config torch-build: type: string - default: "2.0.0.dev20230219+cu117" + default: "2.0.0" torch-build-index: type: string - default: "https://download.pytorch.org/whl/nightly/cu117" + default: "https://download.pytorch.org/whl/cu118" torch-build-legacy: type: string default: "1.13.1+cu117" @@ -1186,10 +1220,10 @@ parameters: default: "8.5.0.96" trt-version-short: type: string - default: "8.5.1" + default: "8.6.0" trt-version-long: type: string - default: "8.5.1.7" + default: "8.6.0" # Jetson platform config torch-jetson-build: @@ -1237,6 +1271,7 @@ workflows: name: build-x86_64-linux torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> + python-version: << pipeline.parameters.python-version >> - test-core-cpp-x86_64-linux: torch-build: << pipeline.parameters.torch-build >> @@ -1244,6 +1279,7 @@ workflows: trt-version-short: << pipeline.parameters.trt-version-short >> trt-version-long: << pipeline.parameters.trt-version-long >> cudnn-version: << pipeline.parameters.cudnn-version >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux @@ -1251,6 +1287,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux @@ -1258,6 +1295,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux @@ -1265,6 +1303,7 @@ workflows: name: build-x86_64-linux-legacy torch-build: << pipeline.parameters.torch-build-legacy >> torch-build-index: << pipeline.parameters.torch-build-index-legacy >> + python-version: << pipeline.parameters.python-version >> - test-core-cpp-x86_64-linux: name: test-core-cpp-x86_64-linux-legacy @@ -1273,6 +1312,7 @@ workflows: trt-version-short: << pipeline.parameters.trt-version-short >> trt-version-long: << pipeline.parameters.trt-version-long >> cudnn-version: << pipeline.parameters.cudnn-version >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux-legacy @@ -1281,6 +1321,7 @@ workflows: torch-build: << pipeline.parameters.torch-build-legacy >> torch-build-index: << pipeline.parameters.torch-build-index-legacy >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux-legacy @@ -1288,6 +1329,7 @@ workflows: torch-build: << pipeline.parameters.torch-build-legacy >> torch-build-index: << pipeline.parameters.torch-build-index-legacy >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux-legacy @@ -1311,6 +1353,7 @@ workflows: trt-version-short: << pipeline.parameters.trt-version-short >> trt-version-long: << pipeline.parameters.trt-version-long >> cudnn-version: << pipeline.parameters.cudnn-version >> + python-version: << pipeline.parameters.python-version >> requires: - package-x86_64-linux @@ -1318,6 +1361,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - package-x86_64-linux @@ -1325,6 +1369,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - package-x86_64-linux @@ -1333,12 +1378,14 @@ workflows: - build-x86_64-linux: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> + python-version: << pipeline.parameters.python-version >> - test-core-cpp-x86_64-linux: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-short: << pipeline.parameters.trt-version-short >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> cudnn-version: << pipeline.parameters.cudnn-version >> requires: - build-x86_64-linux @@ -1347,6 +1394,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux @@ -1354,6 +1402,7 @@ workflows: torch-build: << pipeline.parameters.torch-build >> torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-long: << pipeline.parameters.trt-version-long >> + python-version: << pipeline.parameters.python-version >> requires: - build-x86_64-linux @@ -1362,3 +1411,5 @@ workflows: torch-build-index: << pipeline.parameters.torch-build-index >> trt-version-short: << pipeline.parameters.trt-version-short >> cudnn-version: << pipeline.parameters.cudnn-version >> + python-version: << pipeline.parameters.python-version >> + diff --git a/WORKSPACE b/WORKSPACE index 0d58bff297..be34aab833 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -41,14 +41,9 @@ local_repository( new_local_repository( name = "cuda", build_file = "@//third_party/cuda:BUILD", - path = "/usr/local/cuda-11.7/", + path = "/usr/local/cuda-11.8/", ) -new_local_repository( - name = "cublas", - build_file = "@//third_party/cublas:BUILD", - path = "/usr", -) ############################################################################################################# # Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) ############################################################################################################# @@ -56,17 +51,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "8b3b48615169c83c1b643c0efade078ea080b1da598e15fcf01bc59421f3095e", + sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "aa7fd06079d260ff83c344d043fb84fbd9cf831cf375ed8b5a1b62416817af31", + sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], ) # Download these tarballs manually from the NVIDIA website @@ -86,10 +81,10 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "39cc7f077057d1363794e8ff51c4cf21a5dbeccf1116b0020ba0dae0f3063076", - strip_prefix = "TensorRT-8.5.1.7", + sha256 = "c1732a1093c57ab79fa0b687f061be369e449c9c17792b660f3663ecd8fa7b63", + strip_prefix = "TensorRT-8.6.0.12", urls = [ - "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.5.1/tars/TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.6.0/tars/TensorRT-8.6.0.12.Linux.x86_64-gnu.cuda-11.8.tar.gz", ], ) diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index 313209ba5a..9c6962f7e3 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -148,10 +148,10 @@ TRTEngine::TRTEngine( } TRTEngine::~TRTEngine() { - rt.reset(); trt_engine_profiler.reset(); exec_ctx.reset(); cuda_engine.reset(); + rt.reset(); } void TRTEngine::disable_profiling() { diff --git a/py/ci/build_whl.sh b/py/ci/build_whl.sh index 99b78f53f7..347a66c562 100755 --- a/py/ci/build_whl.sh +++ b/py/ci/build_whl.sh @@ -3,7 +3,7 @@ # Example usage: docker run -it -v$(pwd)/..:/workspace/TRTorch build_trtorch_wheel /bin/bash /workspace/TRTorch/py/build_whl.sh export CXX=g++ -export CUDA_HOME=/usr/local/cuda-11.7 +export CUDA_HOME=/usr/local/cuda-11.8 export PROJECT_DIR=/workspace/project cp -r $CUDA_HOME /usr/local/cuda @@ -108,4 +108,4 @@ libtorchtrt_pre_cxx11_abi() { CUDNN_VERSION=$(cd ${PROJECT_DIR}/py && ${PY_DIR}/bin/python3 -c "from versions import __cudnn_version__;print(__cudnn_version__)") TORCH_VERSION=$(${PY_DIR}/bin/python -c "from torch import __version__;print(__version__.split('+')[0])") cp ${PROJECT_DIR}/bazel-bin/libtorchtrt.tar.gz ${PROJECT_DIR}/py/wheelhouse/libtorchtrt-${TORCHTRT_VERSION}-pre-cxx11-abi-cudnn${CUDNN_VERSION}-tensorrt${TRT_VERSION}-cuda${CUDA_VERSION}-libtorch${TORCH_VERSION}-x86_64-linux.tar.gz -} \ No newline at end of file +} diff --git a/py/requirements.txt b/py/requirements.txt index d28469ba15..c06e1eeb58 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,7 +1,7 @@ numpy pybind11==2.6.2 ---extra-index-url https://download.pytorch.org/whl/nightly/cu117 -torch==2.0.0.dev20230219+cu117 -torchvision==0.15.0.dev20230219+cu117 +--extra-index-url https://download.pytorch.org/whl/nightly/cu118 +torch==2.0.0 +torchvision==0.15.1 --extra-index-url https://pypi.ngc.nvidia.com -tensorrt==8.5.1.7 +tensorrt==8.6.0 diff --git a/py/setup.py b/py/setup.py index f7247a9f90..7e501b667c 100644 --- a/py/setup.py +++ b/py/setup.py @@ -380,7 +380,7 @@ def run(self): long_description=long_description, ext_modules=ext_modules, install_requires=[ - "torch>=1.13.1", + "torch==2.0.0", ], setup_requires=[], cmdclass={ diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 3261265215..6447c4d537 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -58,7 +58,7 @@ def _find_lib(name, paths): elif sys.platform.startswith("linux"): LINUX_PATHS = [ - "/usr/local/cuda/lib64", + "/usr/local/cuda-11.8/lib64", ] if "LD_LIBRARY_PATH" in os.environ: diff --git a/third_party/tensorrt/archive/BUILD b/third_party/tensorrt/archive/BUILD index 7b4ab36def..221f2ce4b3 100644 --- a/third_party/tensorrt/archive/BUILD +++ b/third_party/tensorrt/archive/BUILD @@ -46,10 +46,7 @@ cc_library( "nvinfer_lib", "@cuda//:cudart", "@cudnn", - ] + select({ - ":windows": ["@cuda//:cublas"], - "//conditions:default": ["@cuda//:cublas"], - }), + ], ) #################################################################################### @@ -186,8 +183,5 @@ cc_library( "nvinferplugin_lib", "@cuda//:cudart", "@cudnn", - ] + select({ - ":windows": ["@cuda//:cublas"], - "//conditions:default": ["@cuda//:cublas"], - }), + ], ) diff --git a/third_party/tensorrt/local/BUILD b/third_party/tensorrt/local/BUILD index 2c550deab4..5d0842507f 100644 --- a/third_party/tensorrt/local/BUILD +++ b/third_party/tensorrt/local/BUILD @@ -113,10 +113,7 @@ cc_library( "nvinfer_lib", "@cuda//:cudart", "@cudnn", - ] + select({ - ":windows": ["@cuda//:cublas"], - "//conditions:default": ["@cuda//:cublas"], - }), + ], ) #################################################################################### @@ -370,9 +367,6 @@ cc_library( "nvinfer", "@cuda//:cudart", "@cudnn", - ] + select({ - ":windows": ["@cuda//:cublas"], - "//conditions:default": ["@cuda//:cublas"], - }), + ], alwayslink = True, ) diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64 b/toolchains/ci_workspaces/WORKSPACE.x86_64 index 1caa1ce849..5103fd2f75 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64 +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64 @@ -41,7 +41,7 @@ local_repository( new_local_repository( name = "cuda", build_file = "@//third_party/cuda:BUILD", - path = "/usr/local/cuda/", + path = "/usr/local/cuda-11.8/", ) new_local_repository( diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel index 59270cad51..01d4d14bfe 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel @@ -41,7 +41,7 @@ local_repository( new_local_repository( name = "cuda", build_file = "@//third_party/cuda:BUILD", - path = "/usr/local/cuda-11.7", + path = "/usr/local/cuda-11.8", ) new_local_repository( @@ -56,17 +56,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "8b3b48615169c83c1b643c0efade078ea080b1da598e15fcf01bc59421f3095e", + sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "aa7fd06079d260ff83c344d043fb84fbd9cf831cf375ed8b5a1b62416817af31", + sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], ) #################################################################################### diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu index aa917f460a..01d4d14bfe 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu @@ -41,7 +41,7 @@ local_repository( new_local_repository( name = "cuda", build_file = "@//third_party/cuda:BUILD", - path = "/usr/local/cuda", + path = "/usr/local/cuda-11.8", ) new_local_repository( @@ -56,17 +56,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "8b3b48615169c83c1b643c0efade078ea080b1da598e15fcf01bc59421f3095e", + sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "aa7fd06079d260ff83c344d043fb84fbd9cf831cf375ed8b5a1b62416817af31", + sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"], + urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], ) #################################################################################### From 7457a30dd50d319d8a57a2c77d6dd621d65962ab Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:46:42 -0700 Subject: [PATCH 2/9] fix: Remove references to _native_batch_norm_legit_no_training - Remove references for PyTorch 2.0 stable --- py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py b/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py index 00063c3e21..0ca4383f6e 100644 --- a/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py +++ b/py/torch_tensorrt/fx/passes/lower_basic_pass_aten.py @@ -165,7 +165,6 @@ def replace_aten_op_with_indices(module: torch.fx.GraphModule) -> torch.fx.Graph torch.ops.aten.max_pool3d_with_indices.default, torch.ops.aten.native_batch_norm.default, torch.ops.aten._native_batch_norm_legit.default, - torch.ops.aten._native_batch_norm_legit_no_training.default, ): modified = True if len(n.users) != 1: @@ -186,16 +185,6 @@ def replace_aten_op_with_indices(module: torch.fx.GraphModule) -> torch.fx.Graph new_args = list(n.args) new_args.append(False) new_args = tuple(new_args) - elif ( - n.target == torch.ops.aten._native_batch_norm_legit_no_training.default - ): - new_op = torch.ops.aten.batch_norm - new_args = list(n.args) - new_args.append(False) - # _native_batch_norm_legit_no_training doesn't take in a training arg (assumed to be false) - # but batchnorm takes in a training arg at position 5. - new_args.insert(5, False) - new_args = tuple(new_args) getitem_node = next(iter(n.users)) with module.graph.inserting_after(getitem_node): From 5eba455d203a43ec9a61a34dc9b7030c91fbd830 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 20 Apr 2023 01:52:06 -0700 Subject: [PATCH 3/9] chore: Apply split tests commit Signed-off-by: Dheeraj Peri --- tests/core/conversion/converters/BUILD | 45 + .../core/conversion/converters/test_index.cpp | 294 +++++ .../converters/test_masked_fill.cpp | 99 ++ .../conversion/converters/test_reduce.cpp | 257 +--- .../core/conversion/converters/test_roll.cpp | 84 ++ .../conversion/converters/test_scatter.cpp | 79 ++ .../conversion/converters/test_select.cpp | 1170 ----------------- .../core/conversion/converters/test_slice.cpp | 332 +++++ .../core/conversion/converters/test_split.cpp | 174 +++ .../conversion/converters/test_unbind.cpp | 88 ++ .../conversion/converters/test_unpack.cpp | 243 ++++ .../core/conversion/converters/test_where.cpp | 68 + 12 files changed, 1538 insertions(+), 1395 deletions(-) create mode 100644 tests/core/conversion/converters/test_index.cpp create mode 100644 tests/core/conversion/converters/test_masked_fill.cpp create mode 100644 tests/core/conversion/converters/test_roll.cpp create mode 100644 tests/core/conversion/converters/test_scatter.cpp create mode 100644 tests/core/conversion/converters/test_slice.cpp create mode 100644 tests/core/conversion/converters/test_split.cpp create mode 100644 tests/core/conversion/converters/test_unbind.cpp create mode 100644 tests/core/conversion/converters/test_unpack.cpp create mode 100644 tests/core/conversion/converters/test_where.cpp diff --git a/tests/core/conversion/converters/BUILD b/tests/core/conversion/converters/BUILD index 901ca94998..a8c57b1b41 100644 --- a/tests/core/conversion/converters/BUILD +++ b/tests/core/conversion/converters/BUILD @@ -95,6 +95,10 @@ converter_test( name = "test_matrix_multiply", ) +converter_test( + name = "test_masked_fill", +) + converter_test( name = "test_max", ) @@ -115,6 +119,10 @@ converter_test( name = "test_reduce", ) +converter_test( + name = "test_roll", +) + converter_test( name = "test_reflection_pad", ) @@ -123,6 +131,10 @@ converter_test( name = "test_replication_pad", ) +converter_test( + name = "test_scatter", +) + converter_test( name = "test_shuffle", ) @@ -139,6 +151,10 @@ converter_test( name = "test_interpolate", ) +converter_test( + name = "test_index", +) + converter_test( name = "test_select", ) @@ -147,6 +163,14 @@ converter_test( name = "test_stack", ) +converter_test( + name = "test_slice", +) + +converter_test( + name = "test_split", +) + converter_test( name = "test_topk", ) @@ -159,10 +183,22 @@ converter_test( name = "test_unsqueeze", ) +converter_test( + name = "test_unbind", +) + +converter_test( + name = "test_unpack", +) + converter_test( name = "test_squeeze", ) +converter_test( + name = "test_where", +) + test_suite( name = "converter_tests", tests = [ @@ -185,22 +221,31 @@ test_suite( ":test_expand", ":test_instance_norm", ":test_interpolate", + ":test_index", ":test_layer_norm", ":test_linear", ":test_lstm_cell", ":test_matrix_multiply", + ":test_masked_fill", ":test_max", ":test_normalize", ":test_pooling", ":test_reduce", + ":test_roll", ":test_replication_pad", + ":test_scatter", ":test_select", ":test_shuffle", ":test_softmax", ":test_squeeze", ":test_stack", + ":test_split", + ":test_slice", ":test_topk", ":test_unary", ":test_unsqueeze", + ":test_unbind", + ":test_unpack", + ":test_where", ], ) diff --git a/tests/core/conversion/converters/test_index.cpp b/tests/core/conversion/converters/test_index.cpp new file mode 100644 index 0000000000..34e50f2abd --- /dev/null +++ b/tests/core/conversion/converters/test_index.cpp @@ -0,0 +1,294 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenIndexSelectConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor, %index : Int (2)): + %2 : int = prim::Constant[value=0]() + %3 : Tensor = aten::index_select(%0, %2, %index) + return (%3))IR"; + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + auto in = at::randint(1, 10, {4, 4, 4}, {at::kCUDA}); + auto index = at::randint(0, 4, {2}, {at::kCUDA}).to(torch::kI32); + + auto jit_in = at::clone(in); + auto jit_index = at::clone(index); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {jit_index}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_index = at::clone(index); + auto trt_params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {trt_index}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, trt_params, {trt_in}); + + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenIndexSelectNegativeDimConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor, %index : Int (5)): + %2 : int = prim::Constant[value=-1]() + %3 : Tensor = aten::index_select(%0, %2, %index) + return (%3))IR"; + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {5, 3, 9}, {at::kCUDA}); + auto index = at::randint(0, 9, {5}, {at::kCUDA}).to(torch::kI32); + + auto jit_in = at::clone(in); + auto jit_index = at::clone(index); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {jit_index}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_index = at::clone(index); + auto trt_params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {trt_index}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, trt_params, {trt_in}); + + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenIndexTensorOneIndiceConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index : Tensor): + %18 : Tensor?[] = prim::ListConstruct(%index) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10}, {at::kCUDA}); + auto in2 = at::full({2}, 4, {at::kCUDA}); + auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA); + auto in2_trt = at::full({2}, 4, {options}); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2_trt}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenIndexTensorFullIndicesConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor, + %index2 : Tensor): + %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); + auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); + auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); + auto index2 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + auto index2_trt = index2.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenIndexTensorRepeatedFullIndicesConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor, + %index2 : Tensor): + %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2) + %19 : Tensor = aten::index(%x.1, %18) + %20 : Tensor = aten::index(%x.1, %18) + return (%19, %20))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); + auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); + auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); + auto index2 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + auto index2_trt = index2.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[1], trt_results[1], 2e-6)); +} + +TEST(Converters, ATenIndexTensorIdx0Idx1NoneConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor): + %5 : NoneType = prim::Constant() + %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %5) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); + auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); + auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); + LOG_DEBUG(trt_results); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenIndexTensorIdx0NoneIdx1ConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor): + %5 : NoneType = prim::Constant() + %18 : Tensor?[] = prim::ListConstruct(%index0, %5, %index1) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); + auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); + auto index1 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenIndexTensorNoneIdx0Idx1ConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor): + %5 : NoneType = prim::Constant() + %18 : Tensor?[] = prim::ListConstruct(%5, %index0, %index1) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); + auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); + auto index1 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenIndexTensorIdxsNoneConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor, + %index1 : Tensor, + %index2 : Tensor): + %5 : NoneType = prim::Constant() + %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2, %5) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {4, 8, 8, 4}, {at::kCUDA}); + auto index0 = at::full({4, 13, 1}, 1, {at::kCUDA}).to(torch::kLong); + auto index1 = at::full({4, 13, 1}, 2, {at::kCUDA}).to(torch::kLong); + auto index2 = at::full({4, 13, 1}, 3, {at::kCUDA}).to(torch::kLong); + auto index0_trt = index0.to(torch::kInt32); + auto index1_trt = index1.to(torch::kInt32); + auto index2_trt = index2.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, ATenIndexTensorNoneIdx1ConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, + %index0 : Tensor): + %5 : NoneType = prim::Constant() + %18 : Tensor?[] = prim::ListConstruct(%5, %index0) + %19 : Tensor = aten::index(%x.1, %18) + return (%19))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in1 = at::randint(1, 10, {1, 3, 480, 928}, {at::kCUDA}); + auto index0 = at::tensor({2, 1, 0}, {at::kCUDA}).to(torch::kLong); + + auto index0_trt = index0.to(torch::kInt32); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_masked_fill.cpp b/tests/core/conversion/converters/test_masked_fill.cpp new file mode 100644 index 0000000000..518b31dc02 --- /dev/null +++ b/tests/core/conversion/converters/test_masked_fill.cpp @@ -0,0 +1,99 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenMaskedFillZerosConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %44 : Device = prim::Constant[value="cuda"]() + %8 : bool = prim::Constant[value=0]() + %7 : None = prim::Constant() + %f32_dtype: int = prim::Constant[value=11]() + %1 : int = prim::Constant[value=0]() # bert.py:5:26 + %2 : int = prim::Constant[value=1]() # bert.py:5:32 + %33 : int = prim::Constant[value=2]() # bert.py:6:31 + %3 : int[] = prim::ListConstruct(%1, %1, %2) + %4 : int[] = prim::ListConstruct(%2, %2, %1) + %5 : int[][] = prim::ListConstruct(%3, %4) + %9 : Tensor = aten::tensor(%5, %f32_dtype, %7, %8) # bert.py:5:11 + %mask.1 : Tensor = aten::to(%9, %44, %7, %8, %8) # bert.py:5:11 + %mask.2 : Tensor = trt::const(%mask.1) + %34 : Tensor = aten::masked_fill(%x.1, %mask.1, %33) # bert.py:6:11 + return (%34, %mask.2))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, &*g); + + auto in = at::zeros({1, 2, 3}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + torch_tensorrt::core::lowering::passes::RemoveNOPs(g); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + +TEST(Converters, ATenMaskedFillMixedTypesFloatIntConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, %x.2 : Tensor): + %val : float = prim::Constant[value=4.0]() + %out : Tensor = aten::masked_fill(%x.1, %x.2, %val) + return (%out))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, &*g); + + // Input is a float tensor, filled with an int --> expecting float tensor out + auto in1 = at::rand({2, 3, 5, 7}, {at::kCUDA}).to(torch::kFloat32); + auto in2 = (2 * at::rand({2, 3, 5, 7}, {at::kCUDA})).to(torch::kBool); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); + + // Ensure data types match in outputs + ASSERT_TRUE(jit_results[0].dtype() == trt_results[0].dtype()); +} + +TEST(Converters, ATenMaskedFillMixedTypesIntFloatConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor, %x.2 : Tensor): + %val : int = prim::Constant[value=4]() + %out : Tensor = aten::masked_fill(%x.1, %x.2, %val) + return (%out))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, &*g); + + // Input is an integer tensor, filled with a float --> expecting integer tensor out + auto in1 = at::rand({1, 3, 5, 7}, {at::kCUDA}).to(torch::kInt32); + auto in2 = (2 * at::rand({1, 3, 5, 7}, {at::kCUDA})).to(torch::kBool); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); + + // Ensure data types match in outputs + ASSERT_TRUE(jit_results[0].dtype() == trt_results[0].dtype()); +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_reduce.cpp b/tests/core/conversion/converters/test_reduce.cpp index 40835a8dea..87ad482a26 100644 --- a/tests/core/conversion/converters/test_reduce.cpp +++ b/tests/core/conversion/converters/test_reduce.cpp @@ -344,239 +344,46 @@ TEST(Converters, ATenAnyDimNegIndexConvertsCorrectly) { test_body(graph, in); } -TEST(Converters, UnpackVarLowersCorrectly) { +TEST(Converters, ATenAllDimConvertsCorrectly) { const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::var(%x.1, %6, %5, %4) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, UnpackVarKeepDimsLowersCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::var(%x.1, %6, %5, %5) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, UnpackVarUnbiasedLowersCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::var(%x.1, %6, %4, %4) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, UnpackVarUnbiasedKeepDimsLowersCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::var(%x.1, %6, %4, %5) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, UnpackStdLowersCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::std(%x.1, %6, %5, %4) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackStd(g); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, UnpackStdKeepDimsLowersCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::std(%x.1, %6, %5, %5) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackStd(g); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); + graph(%0 : Tensor): + %1 : int = prim::Constant[value=-1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(0, 2, {64, 2}, at::kCUDA); + test_body(graph, in); } -TEST(Converters, UnpackStdUnbiasedLowersCorrectly) { +TEST(Converters, ATenAllDimKeepDimConvertsCorrectly) { const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %6 : int[] = prim::ListConstruct(%3) - %7 : Tensor = aten::std(%x.1, %6, %4, %4) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackStd(g); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); + graph(%0 : Tensor): + %1 : int = prim::Constant[value=0]() + %3 : bool = prim::Constant[value=1]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(-2, 2, {2, 32}, at::kCUDA).to(torch::kBool); + test_body(graph, in); } -TEST(Converters, UnpackStdUnbiasedKeepDimsLowersCorrectly) { +TEST(Converters, ATenAllDimAllTrueConvertsCorrectly) { const auto graph = R"IR( - graph(%x.1 : Tensor): - %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 - %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 - %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 - %one : int = prim::Constant[value=1]() - %6 : int[] = prim::ListConstruct(%3, %one) - %7 : Tensor = aten::std(%x.1, %6, %4, %5) # test_zeros.py:10:26 - return (%7))IR"; - - auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackStd(g); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); + graph(%0 : Tensor): + %1 : int = prim::Constant[value=1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::ones({2, 32}, at::kCUDA); + test_body(graph, in); } -TEST(Converters, UnpackVarUnbiasedNegAxisLowersCorrectly) { +TEST(Converters, ATenAllDimDynamicConvertsCorrectly) { const auto graph = R"IR( - graph(%x.1 : Tensor): - %37 : bool = prim::Constant[value=1]() - %53 : int[] = prim::Constant[value=[-1]]() - %69 : Tensor = aten::var(%x.1, %53, %37, %37) - return (%69))IR"; - - auto in = at::randint(-5, 5, {2, 20, 768}, at::kCUDA).to(at::kFloat); - - auto jit_in = at::clone(in); - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - in = at::clone(in); - torch_tensorrt::core::lowering::passes::UnpackVar(g); - torch::jit::EliminateCommonSubexpression(g); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {jit_in}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); + graph(%0 : Tensor): + %1 : int = prim::Constant[value=-1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(0, 2, {64, 2}, at::kCUDA).to(torch::kHalf); + test_body(graph, in, true); } diff --git a/tests/core/conversion/converters/test_roll.cpp b/tests/core/conversion/converters/test_roll.cpp new file mode 100644 index 0000000000..693fd47aef --- /dev/null +++ b/tests/core/conversion/converters/test_roll.cpp @@ -0,0 +1,84 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenRollConvertsCorrectly) { + const auto graph = R"IR( + graph(%1 : Tensor): + %2 : int[] = prim::Constant[value=[1, 0, 3, 7]]() + %3 : int[] = prim::Constant[value=[0, 1, 2, 3]]() + %4 : Tensor = aten::roll(%1, %2, %3) + return (%4))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + // Run Pytorch + auto in = at::randint(1, 10, {2, 3, 4, 5}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenRollShiftsNegativeConvertsCorrectly) { + const auto graph = R"IR( + graph(%1 : Tensor): + %2 : int[] = prim::Constant[value=[0, -3, -3]]() + %3 : int[] = prim::Constant[value=[1, 2, 3]]() + %4 : Tensor = aten::roll(%1, %2, %3) + return (%4))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + // Run Pytorch + auto in = at::randint(1, 10, {1, 3, 4, 5}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenRollDimsNegativeConvertsCorrectly) { + const auto graph = R"IR( + graph(%1 : Tensor): + %2 : int[] = prim::Constant[value=[0, -3, -3]]() + %3 : int[] = prim::Constant[value=[1, 2, -1]]() + %4 : Tensor = aten::roll(%1, %2, %3) + return (%4))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + // Run Pytorch + auto in = at::randint(1, 10, {1, 3, 4, 5}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_scatter.cpp b/tests/core/conversion/converters/test_scatter.cpp new file mode 100644 index 0000000000..b7d0883249 --- /dev/null +++ b/tests/core/conversion/converters/test_scatter.cpp @@ -0,0 +1,79 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ScatterValueConvertsCorrectly) { + const auto graph = R"IR( + graph(%data : Tensor, + %index.1 : Tensor): + %value : int = prim::Constant[value=100]() + %dim : int = prim::Constant[value=1]() + %5 : NoneType = prim::Constant() + %6 : bool = prim::Constant[value=0]() + %7 : int = prim::Constant[value=4]() + %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) + %10 : Tensor = aten::scatter(%data, %dim, %index, %value) + return (%10))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto index = at::randint(0, 5, {2, 2}, {at::kCUDA}); + auto data = at::randn({5, 5}, {at::kCUDA}); + + auto jit_index = at::clone(index); + auto jit_data = at::clone(data); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_data, jit_index}); + + auto trt_index = at::clone(index); + auto trt_data = at::clone(data); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_data, trt_index}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ScatterSrcConvertsCorrectly) { + const auto graph = R"IR( + graph(%data : Tensor, + %src : Tensor, + %index.1 : Tensor): + %dim : int = prim::Constant[value=1]() + %5 : NoneType = prim::Constant() + %6 : bool = prim::Constant[value=0]() + %7 : int = prim::Constant[value=4]() + %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) + %10 : Tensor = aten::scatter(%data, %dim, %index, %src) + return (%10))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto index = at::randint(0, 4, {2, 2}, {at::kCUDA}); + auto data = at::randn({5, 5}, {at::kCUDA}); + auto src = at::randn({2, 2}, {at::kCUDA}); + + auto jit_index = at::clone(index); + auto jit_data = at::clone(data); + auto jit_src = at::clone(src); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_data, jit_src, jit_index}); + + auto trt_index = at::clone(index); + auto trt_data = at::clone(data); + auto trt_src = at::clone(src); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_data, trt_src, trt_index}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_select.cpp b/tests/core/conversion/converters/test_select.cpp index d93dd5b2c5..d2af33f099 100644 --- a/tests/core/conversion/converters/test_select.cpp +++ b/tests/core/conversion/converters/test_select.cpp @@ -165,60 +165,6 @@ TEST(Converters, ATenSelectEmptyTensorConvertsCorrectly) { ASSERT_TRUE(torch_tensorrt::tests::util::sameShape(jit_results[0], trt_results[0])); } -TEST(Converters, ATenIndexSelectConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor, %index : Int (2)): - %2 : int = prim::Constant[value=0]() - %3 : Tensor = aten::index_select(%0, %2, %index) - return (%3))IR"; - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - auto in = at::randint(1, 10, {4, 4, 4}, {at::kCUDA}); - auto index = at::randint(0, 4, {2}, {at::kCUDA}).to(torch::kI32); - - auto jit_in = at::clone(in); - auto jit_index = at::clone(index); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {jit_index}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_index = at::clone(index); - auto trt_params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {trt_index}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, trt_params, {trt_in}); - - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenIndexSelectNegativeDimConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor, %index : Int (5)): - %2 : int = prim::Constant[value=-1]() - %3 : Tensor = aten::index_select(%0, %2, %index) - return (%3))IR"; - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {5, 3, 9}, {at::kCUDA}); - auto index = at::randint(0, 9, {5}, {at::kCUDA}).to(torch::kI32); - - auto jit_in = at::clone(in); - auto jit_index = at::clone(index); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {jit_index}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_index = at::clone(index); - auto trt_params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {trt_index}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, trt_params, {trt_in}); - - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - TEST(Converters, ATenNarrowStartScalarConvertsCorrectly) { const auto graph = R"IR( graph(%x.1 : Tensor): @@ -273,1119 +219,3 @@ TEST(Converters, ATenEmbeddingConvertsCorrectly) { ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); } - -TEST(Converters, ATenRollConvertsCorrectly) { - const auto graph = R"IR( - graph(%1 : Tensor): - %2 : int[] = prim::Constant[value=[1, 0, 3, 7]]() - %3 : int[] = prim::Constant[value=[0, 1, 2, 3]]() - %4 : Tensor = aten::roll(%1, %2, %3) - return (%4))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - // Run Pytorch - auto in = at::randint(1, 10, {2, 3, 4, 5}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenRollShiftsNegativeConvertsCorrectly) { - const auto graph = R"IR( - graph(%1 : Tensor): - %2 : int[] = prim::Constant[value=[0, -3, -3]]() - %3 : int[] = prim::Constant[value=[1, 2, 3]]() - %4 : Tensor = aten::roll(%1, %2, %3) - return (%4))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - // Run Pytorch - auto in = at::randint(1, 10, {1, 3, 4, 5}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenRollDimsNegativeConvertsCorrectly) { - const auto graph = R"IR( - graph(%1 : Tensor): - %2 : int[] = prim::Constant[value=[0, -3, -3]]() - %3 : int[] = prim::Constant[value=[1, 2, -1]]() - %4 : Tensor = aten::roll(%1, %2, %3) - return (%4))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - // Run Pytorch - auto in = at::randint(1, 10, {1, 3, 4, 5}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %3 : int = prim::Constant[value=2]() - %4 : int = prim::Constant[value=4]() - %5 : int = prim::Constant[value=1]() - %6 : int = prim::Constant[value=0]() - %7 : Tensor = aten::select(%x.1, %6, %6) - %8 : Tensor = aten::select(%7, %6, %5) - %9 : Tensor = aten::slice(%8, %6, %5, %4, %3) - %10 : Tensor = aten::slice(%9, %5, %2, %2, %5) - return (%10))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {1, 3, 5, 5}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceNegStartIndexConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int = prim::Constant[value=1]() - %3 : int = prim::Constant[value=9223372036854775807]() - %4 : int = prim::Constant[value=-2]() - %5 : int = prim::Constant[value=0]() - %6 : Tensor = aten::slice(%x.1, %5, %4, %3, %2) - %7 : Tensor = aten::slice(%6, %2, %5, %3, %2) - return (%7))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {6, 3}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceNegEndIndexConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int = prim::Constant[value=3]() - %3 : int = prim::Constant[value=9223372036854775807]() - %4 : int = prim::Constant[value=2]() - %5 : int = prim::Constant[value=-3]() - %6 : int = prim::Constant[value=1]() - %7 : int = prim::Constant[value=-2]() - %8 : int = prim::Constant[value=0]() - %9 : Tensor = aten::slice(%x.1, %8, %8, %7, %6) - %10 : Tensor = aten::slice(%9, %6, %8, %5, %6) - %11 : Tensor = aten::slice(%10, %4, %8, %3, %6) - %12 : Tensor = aten::slice(%11, %2, %8, %3, %6) - return (%12))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {6, 5, 3, 3}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceListConvertsCorrectly) { - const auto graph = R"IR( - graph(%x : Tensor): - %1 : NoneType = prim::Constant() - %2 : int = prim::Constant[value=2]() - %3 : int = prim::Constant[value=1]() - %4 : int = prim::Constant[value=3]() - %list : Tensor[] = aten::unbind(%x, %4) - %slice : Tensor[] = aten::slice(%list, %1, %2, %3) - %out.1 : Tensor, %out.2 : Tensor = prim::ListUnpack(%slice) - return (%out.1, %out.2))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in_x = at::randint(1, 10, {6, 5, 3, 3}, {at::kCUDA}); - - auto jit_in_x = at::clone(in_x); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in_x}); - - auto trt_in_x = at::clone(in_x); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in_x}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSliceDynamicBatchConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=0]() - %start : int = prim::Constant[value=1]() - %end : int = prim::Constant[value=15]() - %step : int = prim::Constant[value=2]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamicBatchLargeEndConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=0]() - %start : int = prim::Constant[value=1]() - %end : int = prim::Constant[value=9223372036854775807]() - %step : int = prim::Constant[value=2]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamicNegStartBatchConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=0]() - %start : int = prim::Constant[value=-15]() - %end : int = prim::Constant[value=15]() - %step : int = prim::Constant[value=2]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamicNegEndBatchConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=0]() - %start : int = prim::Constant[value=1]() - %end : int = prim::Constant[value=-2]() - %step : int = prim::Constant[value=3]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamicNoneBatchConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %dim : int = prim::Constant[value=0]() - %start : None = prim::Constant() - %end : None = prim::Constant() - %step : int = prim::Constant[value=3]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamicConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=1]() - %start : int = prim::Constant[value=3]() - %end : int = prim::Constant[value=32]() - %step : int = prim::Constant[value=3]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in dim 1, slice in dim 1 - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, false); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSliceDynamic2ConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %dim : int = prim::Constant[value=1]() - %start : int = prim::Constant[value=3]() - %end : int = prim::Constant[value=17]() - %step : int = prim::Constant[value=3]() - %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) - return (%9))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - // dynamic shape in batch, slice in dim 1 - auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); - auto trt = trt_results[0].reshape(jit_results[0].sizes()); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); -} - -TEST(Converters, ATenSplitSizesInScriptingConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int[] = prim::Constant[value=[1, 2]]() - %3 : int = prim::Constant[value=1]() - %4 : Tensor[] = aten::split(%x.1, %2, %3) - %x1.1 : Tensor, %x2.1 : Tensor = prim::ListUnpack(%4) - return (%x1.1, %x2.1))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSplitSizesinTracingConvertsCorrectly) { - const auto graph = R"IR( - graph(%argument_1.1 : Tensor): - %2 : int[] = prim::Constant[value=[1, 2]]() - %3 : int = prim::Constant[value=1]() - %4 : Tensor[] = aten::split_with_sizes(%argument_1.1, %2, %3) - %5 : Tensor, %6 : Tensor = prim::ListUnpack(%4) - return (%5, %6))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSplitFixedConvertsCorrectly) { - const auto graph = R"IR( - graph(%argument_1.1 : Tensor): - %2 : int = prim::Constant[value=1]() - %3 : Tensor[] = aten::split(%argument_1.1, %2, %2) - %4 : Tensor, %5 : Tensor, %6 : Tensor = prim::ListUnpack(%3) - return (%4, %5, %6))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSplitFixedHasRemainderConvertsCorrectly) { - const auto graph = R"IR( - graph(%argument_1.1 : Tensor): - %2 : int = prim::Constant[value=2]() - %2.1 : int = prim::Constant[value=1]() - %3 : Tensor[] = aten::split(%argument_1.1, %2, %2.1) - %4 : Tensor, %5 : Tensor, %6 : Tensor = prim::ListUnpack(%3) - return (%4, %5, %6))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, &*g); - - auto in = at::randint(1, 10, {1, 5, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSplitAndAddConvertsCorrectly) { - const auto graph = R"IR( - graph(%argument_1.1 : Tensor): - %2 : int = prim::Constant[value=2]() - %2.1 : int = prim::Constant[value=1]() - %3 : Tensor[] = aten::split(%argument_1.1, %2, %2.1) - %4 : Tensor, %5 : Tensor = prim::ListUnpack(%3) - %6 : Tensor = aten::add(%4, %5, %2.1) - return (%6))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, &*g); - - auto in = at::randint(1, 10, {1, 4, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenSplitNegativeDimsConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int = prim::Constant[value=1]() - %n1 : int = prim::Constant[value=-1]() - %3 : Tensor[] = aten::split(%x.1, %2, %n1) - %4 : Tensor, %5 : Tensor, %6 : Tensor, %7 : Tensor = prim::ListUnpack(%3) - return (%4, %5, %6, %7))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenMaskedFillZerosConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %44 : Device = prim::Constant[value="cuda"]() - %8 : bool = prim::Constant[value=0]() - %7 : None = prim::Constant() - %f32_dtype: int = prim::Constant[value=11]() - %1 : int = prim::Constant[value=0]() # bert.py:5:26 - %2 : int = prim::Constant[value=1]() # bert.py:5:32 - %33 : int = prim::Constant[value=2]() # bert.py:6:31 - %3 : int[] = prim::ListConstruct(%1, %1, %2) - %4 : int[] = prim::ListConstruct(%2, %2, %1) - %5 : int[][] = prim::ListConstruct(%3, %4) - %9 : Tensor = aten::tensor(%5, %f32_dtype, %7, %8) # bert.py:5:11 - %mask.1 : Tensor = aten::to(%9, %44, %7, %8, %8) # bert.py:5:11 - %mask.2 : Tensor = trt::const(%mask.1) - %34 : Tensor = aten::masked_fill(%x.1, %mask.1, %33) # bert.py:6:11 - return (%34, %mask.2))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, &*g); - - auto in = at::zeros({1, 2, 3}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - torch_tensorrt::core::lowering::passes::RemoveNOPs(g); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenMaskedFillMixedTypesFloatIntConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, %x.2 : Tensor): - %val : float = prim::Constant[value=4.0]() - %out : Tensor = aten::masked_fill(%x.1, %x.2, %val) - return (%out))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, &*g); - - // Input is a float tensor, filled with an int --> expecting float tensor out - auto in1 = at::rand({2, 3, 5, 7}, {at::kCUDA}).to(torch::kFloat32); - auto in2 = (2 * at::rand({2, 3, 5, 7}, {at::kCUDA})).to(torch::kBool); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); - - // Ensure data types match in outputs - ASSERT_TRUE(jit_results[0].dtype() == trt_results[0].dtype()); -} - -TEST(Converters, ATenMaskedFillMixedTypesIntFloatConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, %x.2 : Tensor): - %val : int = prim::Constant[value=4]() - %out : Tensor = aten::masked_fill(%x.1, %x.2, %val) - return (%out))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, &*g); - - // Input is an integer tensor, filled with a float --> expecting integer tensor out - auto in1 = at::rand({1, 3, 5, 7}, {at::kCUDA}).to(torch::kInt32); - auto in2 = (2 * at::rand({1, 3, 5, 7}, {at::kCUDA})).to(torch::kBool); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); - - // Ensure data types match in outputs - ASSERT_TRUE(jit_results[0].dtype() == trt_results[0].dtype()); -} - -TEST(Converters, ATenIndexTensorOneIndiceConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index : Tensor): - %18 : Tensor?[] = prim::ListConstruct(%index) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10}, {at::kCUDA}); - auto in2 = at::full({2}, 4, {at::kCUDA}); - auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA); - auto in2_trt = at::full({2}, 4, {options}); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, in2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, in2_trt}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenIndexTensorFullIndicesConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor, - %index2 : Tensor): - %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); - auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); - auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); - auto index2 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - auto index2_trt = index2.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenIndexTensorRepeatedFullIndicesConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor, - %index2 : Tensor): - %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2) - %19 : Tensor = aten::index(%x.1, %18) - %20 : Tensor = aten::index(%x.1, %18) - return (%19, %20))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); - auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); - auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); - auto index2 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - auto index2_trt = index2.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[1], trt_results[1], 2e-6)); -} - -TEST(Converters, ATenIndexTensorIdx0Idx1NoneConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor): - %5 : NoneType = prim::Constant() - %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %5) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); - auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); - auto index1 = at::tensor({1, 3, 4, 6}, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); - LOG_DEBUG(trt_results); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenIndexTensorIdx0NoneIdx1ConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor): - %5 : NoneType = prim::Constant() - %18 : Tensor?[] = prim::ListConstruct(%index0, %5, %index1) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); - auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); - auto index1 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenIndexTensorNoneIdx0Idx1ConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor): - %5 : NoneType = prim::Constant() - %18 : Tensor?[] = prim::ListConstruct(%5, %index0, %index1) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {5, 10, 4}, {at::kCUDA}); - auto index0 = at::tensor({0, 1, 2, 3}, {at::kCUDA}).to(torch::kLong); - auto index1 = at::tensor({3, 2, 1, 0}, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt}); - - ASSERT_TRUE( - torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); -} - -TEST(Converters, ATenIndexTensorIdxsNoneConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor, - %index1 : Tensor, - %index2 : Tensor): - %5 : NoneType = prim::Constant() - %18 : Tensor?[] = prim::ListConstruct(%index0, %index1, %index2, %5) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {4, 8, 8, 4}, {at::kCUDA}); - auto index0 = at::full({4, 13, 1}, 1, {at::kCUDA}).to(torch::kLong); - auto index1 = at::full({4, 13, 1}, 2, {at::kCUDA}).to(torch::kLong); - auto index2 = at::full({4, 13, 1}, 3, {at::kCUDA}).to(torch::kLong); - auto index0_trt = index0.to(torch::kInt32); - auto index1_trt = index1.to(torch::kInt32); - auto index2_trt = index2.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0, index1, index2}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt, index1_trt, index2_trt}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, ATenIndexTensorNoneIdx1ConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor, - %index0 : Tensor): - %5 : NoneType = prim::Constant() - %18 : Tensor?[] = prim::ListConstruct(%5, %index0) - %19 : Tensor = aten::index(%x.1, %18) - return (%19))IR"; - - auto g = std::make_shared(); - torch::jit::parseIR(graph, g.get()); - - auto in1 = at::randint(1, 10, {1, 3, 480, 928}, {at::kCUDA}); - auto index0 = at::tensor({2, 1, 0}, {at::kCUDA}).to(torch::kLong); - - auto index0_trt = index0.to(torch::kInt32); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in1, index0}); - - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in1, index0_trt}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, ATenUnbindConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int = prim::Constant[value=0]() - %3 : Tensor[] = aten::unbind(%x.1, %2) - %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%3) - return (%o1.1, %o2.1))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {2, 3, 4, 4}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i]; - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenUnbindNegativeAxisConvertsCorrectly) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : int = prim::Constant[value=-1]() - %3 : Tensor[] = aten::unbind(%x.1, %2) - %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%3) - return (%o1.1, %o2.1))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto in = at::randint(1, 10, {5, 2}, {at::kCUDA}); - - auto jit_in = at::clone(in); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); - - auto trt_in = at::clone(in); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i]; - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ATenUnbindEvaluatedTensor) { - const auto graph = R"IR( - graph(%x.1 : Tensor): - %2 : None = prim::Constant() - %3 : int[] = aten::size(%x.1) - %z.1 : Tensor = aten::zeros(%3, %2, %2, %2, %2) - %5 : int = prim::Constant[value=-1]() - %6 : Tensor[] = aten::unbind(%z.1, %5) - %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%6) - return (%o1.1, %o2.1))IR"; - - auto in = at::randint(1, 10, {2}, {at::kCUDA}); - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i]; - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i].cuda(), trt, 2e-6)); - } -} - -TEST(Converters, ScatterValueConvertsCorrectly) { - const auto graph = R"IR( - graph(%data : Tensor, - %index.1 : Tensor): - %value : int = prim::Constant[value=100]() - %dim : int = prim::Constant[value=1]() - %5 : NoneType = prim::Constant() - %6 : bool = prim::Constant[value=0]() - %7 : int = prim::Constant[value=4]() - %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) - %10 : Tensor = aten::scatter(%data, %dim, %index, %value) - return (%10))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto index = at::randint(0, 5, {2, 2}, {at::kCUDA}); - auto data = at::randn({5, 5}, {at::kCUDA}); - - auto jit_index = at::clone(index); - auto jit_data = at::clone(data); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_data, jit_index}); - - auto trt_index = at::clone(index); - auto trt_data = at::clone(data); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_data, trt_index}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, ScatterSrcConvertsCorrectly) { - const auto graph = R"IR( - graph(%data : Tensor, - %src : Tensor, - %index.1 : Tensor): - %dim : int = prim::Constant[value=1]() - %5 : NoneType = prim::Constant() - %6 : bool = prim::Constant[value=0]() - %7 : int = prim::Constant[value=4]() - %index : Tensor = aten::to(%index.1, %7, %6, %6, %5) - %10 : Tensor = aten::scatter(%data, %dim, %index, %src) - return (%10))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto index = at::randint(0, 4, {2, 2}, {at::kCUDA}); - auto data = at::randn({5, 5}, {at::kCUDA}); - auto src = at::randn({2, 2}, {at::kCUDA}); - - auto jit_index = at::clone(index); - auto jit_data = at::clone(data); - auto jit_src = at::clone(src); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_data, jit_src, jit_index}); - - auto trt_index = at::clone(index); - auto trt_data = at::clone(data); - auto trt_src = at::clone(src); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_data, trt_src, trt_index}); - - for (size_t i = 0; i < jit_results.size(); i++) { - auto trt = trt_results[i].reshape(jit_results[i].sizes()); - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); - } -} - -TEST(Converters, WhereConvertsCorrectly) { - const auto graph = R"IR( - graph(%condition : Tensor, - %x : Tensor, - %y : Tensor): - %out : Tensor = aten::where(%condition, %x, %y) - return (%out))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - auto condition = at::randint(0, 2, {5, 5}, {at::kCUDA}).to(torch::kBool); - auto x = at::randn({5, 5}, {at::kCUDA}); - auto y = at::randn({5, 5}, {at::kCUDA}); - - auto jit_condition = at::clone(condition); - auto jit_x = at::clone(x); - auto jit_y = at::clone(y); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y}); - - auto trt_condition = at::clone(condition); - auto trt_x = at::clone(x); - auto trt_y = at::clone(y); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} - -TEST(Converters, WhereConvertsMismatchedShapesCorrectly) { - const auto graph = R"IR( - graph(%condition : Tensor, - %x : Tensor, - %y : Tensor): - %out : Tensor = aten::where(%condition, %x, %y) - return (%out))IR"; - - auto g = std::make_shared(); - - torch::jit::parseIR(graph, g.get()); - - // As per Torch behavior, the input Tensors are expected to be broadcasted - // along their respective dimension in the largest-rank Tensor provided - auto condition = at::randint(0, 2, {7, 5}, {at::kCUDA}).to(torch::kBool); - auto x = at::randn({2, 7, 5}, {at::kCUDA}); - auto y = at::randn({5}, {at::kCUDA}); - - auto jit_condition = at::clone(condition); - auto jit_x = at::clone(x); - auto jit_y = at::clone(y); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y}); - - auto trt_condition = at::clone(condition); - auto trt_x = at::clone(x); - auto trt_y = at::clone(y); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y}); - - ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); -} diff --git a/tests/core/conversion/converters/test_slice.cpp b/tests/core/conversion/converters/test_slice.cpp new file mode 100644 index 0000000000..83ba879291 --- /dev/null +++ b/tests/core/conversion/converters/test_slice.cpp @@ -0,0 +1,332 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenSliceConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %3 : int = prim::Constant[value=2]() + %4 : int = prim::Constant[value=4]() + %5 : int = prim::Constant[value=1]() + %6 : int = prim::Constant[value=0]() + %7 : Tensor = aten::select(%x.1, %6, %6) + %8 : Tensor = aten::select(%7, %6, %5) + %9 : Tensor = aten::slice(%8, %6, %5, %4, %3) + %10 : Tensor = aten::slice(%9, %5, %2, %2, %5) + return (%10))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 3, 5, 5}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceNegStartIndexConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int = prim::Constant[value=1]() + %3 : int = prim::Constant[value=9223372036854775807]() + %4 : int = prim::Constant[value=-2]() + %5 : int = prim::Constant[value=0]() + %6 : Tensor = aten::slice(%x.1, %5, %4, %3, %2) + %7 : Tensor = aten::slice(%6, %2, %5, %3, %2) + return (%7))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {6, 3}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceNegEndIndexConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int = prim::Constant[value=3]() + %3 : int = prim::Constant[value=9223372036854775807]() + %4 : int = prim::Constant[value=2]() + %5 : int = prim::Constant[value=-3]() + %6 : int = prim::Constant[value=1]() + %7 : int = prim::Constant[value=-2]() + %8 : int = prim::Constant[value=0]() + %9 : Tensor = aten::slice(%x.1, %8, %8, %7, %6) + %10 : Tensor = aten::slice(%9, %6, %8, %5, %6) + %11 : Tensor = aten::slice(%10, %4, %8, %3, %6) + %12 : Tensor = aten::slice(%11, %2, %8, %3, %6) + return (%12))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {6, 5, 3, 3}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceListConvertsCorrectly) { + const auto graph = R"IR( + graph(%x : Tensor): + %1 : NoneType = prim::Constant() + %2 : int = prim::Constant[value=2]() + %3 : int = prim::Constant[value=1]() + %4 : int = prim::Constant[value=3]() + %list : Tensor[] = aten::unbind(%x, %4) + %slice : Tensor[] = aten::slice(%list, %1, %2, %3) + %out.1 : Tensor, %out.2 : Tensor = prim::ListUnpack(%slice) + return (%out.1, %out.2))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in_x = at::randint(1, 10, {6, 5, 3, 3}, {at::kCUDA}); + + auto jit_in_x = at::clone(in_x); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in_x}); + + auto trt_in_x = at::clone(in_x); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in_x}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSliceDynamicBatchConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=0]() + %start : int = prim::Constant[value=1]() + %end : int = prim::Constant[value=15]() + %step : int = prim::Constant[value=2]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamicBatchLargeEndConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=0]() + %start : int = prim::Constant[value=1]() + %end : int = prim::Constant[value=9223372036854775807]() + %step : int = prim::Constant[value=2]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamicNegStartBatchConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=0]() + %start : int = prim::Constant[value=-15]() + %end : int = prim::Constant[value=15]() + %step : int = prim::Constant[value=2]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamicNegEndBatchConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=0]() + %start : int = prim::Constant[value=1]() + %end : int = prim::Constant[value=-2]() + %step : int = prim::Constant[value=3]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamicNoneBatchConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %dim : int = prim::Constant[value=0]() + %start : None = prim::Constant() + %end : None = prim::Constant() + %step : int = prim::Constant[value=3]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamicConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=1]() + %start : int = prim::Constant[value=3]() + %end : int = prim::Constant[value=32]() + %step : int = prim::Constant[value=3]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in dim 1, slice in dim 1 + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, false); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} + +TEST(Converters, ATenSliceDynamic2ConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %dim : int = prim::Constant[value=1]() + %start : int = prim::Constant[value=3]() + %end : int = prim::Constant[value=17]() + %step : int = prim::Constant[value=3]() + %9 : Tensor = aten::slice(%x.1, %dim, %start, %end, %step) + return (%9))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {16, 32}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + // dynamic shape in batch, slice in dim 1 + auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {trt_in}, true); + auto trt = trt_results[0].reshape(jit_results[0].sizes()); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6)); +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_split.cpp b/tests/core/conversion/converters/test_split.cpp new file mode 100644 index 0000000000..87bd5a16e0 --- /dev/null +++ b/tests/core/conversion/converters/test_split.cpp @@ -0,0 +1,174 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenSplitSizesInScriptingConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int[] = prim::Constant[value=[1, 2]]() + %3 : int = prim::Constant[value=1]() + %4 : Tensor[] = aten::split(%x.1, %2, %3) + %x1.1 : Tensor, %x2.1 : Tensor = prim::ListUnpack(%4) + return (%x1.1, %x2.1))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSplitSizesinTracingConvertsCorrectly) { + const auto graph = R"IR( + graph(%argument_1.1 : Tensor): + %2 : int[] = prim::Constant[value=[1, 2]]() + %3 : int = prim::Constant[value=1]() + %4 : Tensor[] = aten::split_with_sizes(%argument_1.1, %2, %3) + %5 : Tensor, %6 : Tensor = prim::ListUnpack(%4) + return (%5, %6))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSplitFixedConvertsCorrectly) { + const auto graph = R"IR( + graph(%argument_1.1 : Tensor): + %2 : int = prim::Constant[value=1]() + %3 : Tensor[] = aten::split(%argument_1.1, %2, %2) + %4 : Tensor, %5 : Tensor, %6 : Tensor = prim::ListUnpack(%3) + return (%4, %5, %6))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSplitFixedHasRemainderConvertsCorrectly) { + const auto graph = R"IR( + graph(%argument_1.1 : Tensor): + %2 : int = prim::Constant[value=2]() + %2.1 : int = prim::Constant[value=1]() + %3 : Tensor[] = aten::split(%argument_1.1, %2, %2.1) + %4 : Tensor, %5 : Tensor, %6 : Tensor = prim::ListUnpack(%3) + return (%4, %5, %6))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, &*g); + + auto in = at::randint(1, 10, {1, 5, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSplitAndAddConvertsCorrectly) { + const auto graph = R"IR( + graph(%argument_1.1 : Tensor): + %2 : int = prim::Constant[value=2]() + %2.1 : int = prim::Constant[value=1]() + %3 : Tensor[] = aten::split(%argument_1.1, %2, %2.1) + %4 : Tensor, %5 : Tensor = prim::ListUnpack(%3) + %6 : Tensor = aten::add(%4, %5, %2.1) + return (%6))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, &*g); + + auto in = at::randint(1, 10, {1, 4, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenSplitNegativeDimsConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int = prim::Constant[value=1]() + %n1 : int = prim::Constant[value=-1]() + %3 : Tensor[] = aten::split(%x.1, %2, %n1) + %4 : Tensor, %5 : Tensor, %6 : Tensor, %7 : Tensor = prim::ListUnpack(%3) + return (%4, %5, %6, %7))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 3, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i].reshape(jit_results[i].sizes()); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_unbind.cpp b/tests/core/conversion/converters/test_unbind.cpp new file mode 100644 index 0000000000..0062a055bb --- /dev/null +++ b/tests/core/conversion/converters/test_unbind.cpp @@ -0,0 +1,88 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, ATenUnbindConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int = prim::Constant[value=0]() + %3 : Tensor[] = aten::unbind(%x.1, %2) + %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%3) + return (%o1.1, %o2.1))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {2, 3, 4, 4}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i]; + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenUnbindNegativeAxisConvertsCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : int = prim::Constant[value=-1]() + %3 : Tensor[] = aten::unbind(%x.1, %2) + %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%3) + return (%o1.1, %o2.1))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {5, 2}, {at::kCUDA}); + + auto jit_in = at::clone(in); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + auto trt_in = at::clone(in); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i]; + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i], trt, 2e-6)); + } +} + +TEST(Converters, ATenUnbindEvaluatedTensor) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %2 : None = prim::Constant() + %3 : int[] = aten::size(%x.1) + %z.1 : Tensor = aten::zeros(%3, %2, %2, %2, %2) + %5 : int = prim::Constant[value=-1]() + %6 : Tensor[] = aten::unbind(%z.1, %5) + %o1.1 : Tensor, %o2.1 : Tensor = prim::ListUnpack(%6) + return (%o1.1, %o2.1))IR"; + + auto in = at::randint(1, 10, {2}, {at::kCUDA}); + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + + for (size_t i = 0; i < jit_results.size(); i++) { + auto trt = trt_results[i]; + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[i].cuda(), trt, 2e-6)); + } +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_unpack.cpp b/tests/core/conversion/converters/test_unpack.cpp new file mode 100644 index 0000000000..9e540723fa --- /dev/null +++ b/tests/core/conversion/converters/test_unpack.cpp @@ -0,0 +1,243 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, UnpackVarLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::var(%x.1, %6, %5, %4) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackVarKeepDimsLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::var(%x.1, %6, %5, %5) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackVarUnbiasedLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::var(%x.1, %6, %4, %4) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackVarUnbiasedKeepDimsLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::var(%x.1, %6, %4, %5) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackStdLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::std(%x.1, %6, %5, %4) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackStd(g); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackStdKeepDimsLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::std(%x.1, %6, %5, %5) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackStd(g); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackStdUnbiasedLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %6 : int[] = prim::ListConstruct(%3) + %7 : Tensor = aten::std(%x.1, %6, %4, %4) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackStd(g); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackStdUnbiasedKeepDimsLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %5 : bool = prim::Constant[value=0]() # test_zeros.py:10:65 + %4 : bool = prim::Constant[value=1]() # test_zeros.py:10:50 + %3 : int = prim::Constant[value=0]() # test_zeros.py:10:39 + %one : int = prim::Constant[value=1]() + %6 : int[] = prim::ListConstruct(%3, %one) + %7 : Tensor = aten::std(%x.1, %6, %4, %5) # test_zeros.py:10:26 + return (%7))IR"; + + auto in = at::randint(-5, 5, {4, 4, 4}, at::kCUDA); + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackStd(g); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, UnpackVarUnbiasedNegAxisLowersCorrectly) { + const auto graph = R"IR( + graph(%x.1 : Tensor): + %37 : bool = prim::Constant[value=1]() + %53 : int[] = prim::Constant[value=[-1]]() + %69 : Tensor = aten::var(%x.1, %53, %37, %37) + return (%69))IR"; + + auto in = at::randint(-5, 5, {2, 20, 768}, at::kCUDA).to(at::kFloat); + + auto jit_in = at::clone(in); + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_in}); + + in = at::clone(in); + torch_tensorrt::core::lowering::passes::UnpackVar(g); + torch::jit::EliminateCommonSubexpression(g); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {jit_in}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} \ No newline at end of file diff --git a/tests/core/conversion/converters/test_where.cpp b/tests/core/conversion/converters/test_where.cpp new file mode 100644 index 0000000000..23482662bf --- /dev/null +++ b/tests/core/conversion/converters/test_where.cpp @@ -0,0 +1,68 @@ +#include +#include "core/compiler.h" +#include "core/lowering/passes/passes.h" +#include "gtest/gtest.h" +#include "tests/util/util.h" +#include "torch/csrc/jit/ir/irparser.h" + +TEST(Converters, WhereConvertsCorrectly) { + const auto graph = R"IR( + graph(%condition : Tensor, + %x : Tensor, + %y : Tensor): + %out : Tensor = aten::where(%condition, %x, %y) + return (%out))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + auto condition = at::randint(0, 2, {5, 5}, {at::kCUDA}).to(torch::kBool); + auto x = at::randn({5, 5}, {at::kCUDA}); + auto y = at::randn({5, 5}, {at::kCUDA}); + + auto jit_condition = at::clone(condition); + auto jit_x = at::clone(x); + auto jit_y = at::clone(y); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y}); + + auto trt_condition = at::clone(condition); + auto trt_x = at::clone(x); + auto trt_y = at::clone(y); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} + +TEST(Converters, WhereConvertsMismatchedShapesCorrectly) { + const auto graph = R"IR( + graph(%condition : Tensor, + %x : Tensor, + %y : Tensor): + %out : Tensor = aten::where(%condition, %x, %y) + return (%out))IR"; + + auto g = std::make_shared(); + + torch::jit::parseIR(graph, g.get()); + + // As per Torch behavior, the input Tensors are expected to be broadcasted + // along their respective dimension in the largest-rank Tensor provided + auto condition = at::randint(0, 2, {7, 5}, {at::kCUDA}).to(torch::kBool); + auto x = at::randn({2, 7, 5}, {at::kCUDA}); + auto y = at::randn({5}, {at::kCUDA}); + + auto jit_condition = at::clone(condition); + auto jit_x = at::clone(x); + auto jit_y = at::clone(y); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {jit_condition, jit_x, jit_y}); + + auto trt_condition = at::clone(condition); + auto trt_x = at::clone(x); + auto trt_y = at::clone(y); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {trt_condition, trt_x, trt_y}); + + ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); +} \ No newline at end of file From 125a0a6f626a83664cc71d76c27736b769c425eb Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 20 Apr 2023 11:51:54 -0700 Subject: [PATCH 4/9] chore: Fix missing headers in tests Signed-off-by: Dheeraj Peri --- tests/core/conversion/converters/test_masked_fill.cpp | 1 + tests/core/conversion/converters/test_reduce.cpp | 9 +++++++-- tests/core/conversion/converters/test_unpack.cpp | 2 ++ tests/core/conversion/converters/test_where.cpp | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/core/conversion/converters/test_masked_fill.cpp b/tests/core/conversion/converters/test_masked_fill.cpp index 518b31dc02..2c375463e5 100644 --- a/tests/core/conversion/converters/test_masked_fill.cpp +++ b/tests/core/conversion/converters/test_masked_fill.cpp @@ -1,3 +1,4 @@ +#include #include #include "core/compiler.h" #include "core/lowering/passes/passes.h" diff --git a/tests/core/conversion/converters/test_reduce.cpp b/tests/core/conversion/converters/test_reduce.cpp index 87ad482a26..3cdb2d3b84 100644 --- a/tests/core/conversion/converters/test_reduce.cpp +++ b/tests/core/conversion/converters/test_reduce.cpp @@ -62,7 +62,7 @@ std::string gen_keepdim_graph(const std::string& op) { return (%5))IR"; } -void test_body(const std::string& graph, at::Tensor& in) { +void test_body(const std::string& graph, at::Tensor& in, bool dynamic = false) { auto g = std::make_shared(); torch::jit::parseIR(graph, g.get()); @@ -71,7 +71,12 @@ void test_body(const std::string& graph, at::Tensor& in) { in = at::clone(in); params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {}); - auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + std::vector trt_results; + if (dynamic) { + trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}); + } else { + trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + } ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0], 2e-6)); } } // namespace diff --git a/tests/core/conversion/converters/test_unpack.cpp b/tests/core/conversion/converters/test_unpack.cpp index 9e540723fa..858462b003 100644 --- a/tests/core/conversion/converters/test_unpack.cpp +++ b/tests/core/conversion/converters/test_unpack.cpp @@ -4,6 +4,8 @@ #include "gtest/gtest.h" #include "tests/util/util.h" #include "torch/csrc/jit/ir/irparser.h" +#include "torch/csrc/jit/passes/common_subexpression_elimination.h" +#include "torch/torch.h" TEST(Converters, UnpackVarLowersCorrectly) { const auto graph = R"IR( diff --git a/tests/core/conversion/converters/test_where.cpp b/tests/core/conversion/converters/test_where.cpp index 23482662bf..34b3696582 100644 --- a/tests/core/conversion/converters/test_where.cpp +++ b/tests/core/conversion/converters/test_where.cpp @@ -1,3 +1,4 @@ +#include #include #include "core/compiler.h" #include "core/lowering/passes/passes.h" From f0df3dc5dfb9f9b53e761187efd407cfe0301afa Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 20 Apr 2023 12:43:14 -0700 Subject: [PATCH 5/9] chore: fix headers Signed-off-by: Dheeraj Peri --- tests/core/conversion/converters/test_index.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/core/conversion/converters/test_index.cpp b/tests/core/conversion/converters/test_index.cpp index 34e50f2abd..b405d7a436 100644 --- a/tests/core/conversion/converters/test_index.cpp +++ b/tests/core/conversion/converters/test_index.cpp @@ -1,3 +1,4 @@ +#include #include #include "core/compiler.h" #include "core/lowering/passes/passes.h" From e2184bc96a045eb245071dc1e82ccec5cfbc91c6 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Thu, 20 Apr 2023 15:03:03 -0700 Subject: [PATCH 6/9] chore: remove aten::all dim tests Signed-off-by: Dheeraj Peri --- .../conversion/converters/test_reduce.cpp | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/tests/core/conversion/converters/test_reduce.cpp b/tests/core/conversion/converters/test_reduce.cpp index 3cdb2d3b84..2e2bb5cbd3 100644 --- a/tests/core/conversion/converters/test_reduce.cpp +++ b/tests/core/conversion/converters/test_reduce.cpp @@ -348,47 +348,3 @@ TEST(Converters, ATenAnyDimNegIndexConvertsCorrectly) { auto in = at::randint(-2, 2, {2, 32}, at::kCUDA); test_body(graph, in); } - -TEST(Converters, ATenAllDimConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor): - %1 : int = prim::Constant[value=-1]() - %3 : bool = prim::Constant[value=0]() - %5 : Tensor = aten::all(%0, %1, %3) - return (%5))IR"; - auto in = at::randint(0, 2, {64, 2}, at::kCUDA); - test_body(graph, in); -} - -TEST(Converters, ATenAllDimKeepDimConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor): - %1 : int = prim::Constant[value=0]() - %3 : bool = prim::Constant[value=1]() - %5 : Tensor = aten::all(%0, %1, %3) - return (%5))IR"; - auto in = at::randint(-2, 2, {2, 32}, at::kCUDA).to(torch::kBool); - test_body(graph, in); -} - -TEST(Converters, ATenAllDimAllTrueConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor): - %1 : int = prim::Constant[value=1]() - %3 : bool = prim::Constant[value=0]() - %5 : Tensor = aten::all(%0, %1, %3) - return (%5))IR"; - auto in = at::ones({2, 32}, at::kCUDA); - test_body(graph, in); -} - -TEST(Converters, ATenAllDimDynamicConvertsCorrectly) { - const auto graph = R"IR( - graph(%0 : Tensor): - %1 : int = prim::Constant[value=-1]() - %3 : bool = prim::Constant[value=0]() - %5 : Tensor = aten::all(%0, %1, %3) - return (%5))IR"; - auto in = at::randint(0, 2, {64, 2}, at::kCUDA).to(torch::kHalf); - test_body(graph, in, true); -} From 2b70a2fd8ddebe4e4bf3cdfae9fe8da1b1cb4171 Mon Sep 17 00:00:00 2001 From: George S <113141689+gs-olive@users.noreply.github.com> Date: Tue, 2 May 2023 14:50:09 -0700 Subject: [PATCH 7/9] fix: Upgrade to PyTorch 2.0.1 Release Candidate + Other improvements (#1857) --- .circleci/config.yml | 22 ++--- README.md | 26 +++--- WORKSPACE | 14 +-- docker/Dockerfile | 89 +++++++++++-------- docker/README.md | 15 +++- docker/WORKSPACE.docker | 16 ++-- docker/WORKSPACE.ngc | 6 +- docker/dist-build.sh | 10 ++- py/requirements.txt | 6 +- py/setup.py | 10 ++- py/torch_tensorrt/fx/README.md | 8 +- .../passes/test_fuse_permute_linear_trt.py | 5 -- py/versions.py | 6 +- pyproject.toml | 5 +- .../WORKSPACE.x86_64.release.rhel | 8 +- .../WORKSPACE.x86_64.release.ubuntu | 8 +- 16 files changed, 141 insertions(+), 113 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c3ea57f0f1..76727084e1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -102,7 +102,7 @@ commands: sudo apt-get --purge remove "*nvidia*" install-cudnn: - description: "Install CUDNN 8.5.0" + description: "Install CUDNN 8.8.0" parameters: os: type: string @@ -112,10 +112,10 @@ commands: default: "x86_64" cudnn-version: type: string - default: "8.5.0.96" + default: "8.8.0.121" cuda-version: type: string - default: "cuda11.7" + default: "cuda11.8" steps: - run: name: Install CUDNN @@ -200,7 +200,7 @@ commands: default: "cuda11.8" cudnn-version: type: string - default: "8.5.0.96" + default: "8.8.0.121" trt-version-short: type: string default: "8.6.0" @@ -252,7 +252,7 @@ commands: default: "8.6.0" cudnn-version-long: type: string - default: "8.5.0.96" + default: "8.8.0.121" steps: - run: name: Set up python environment @@ -269,10 +269,10 @@ commands: parameters: torch-build: type: string - default: "2.0.0" + default: "2.0.1" torch-build-index: type: string - default: "https://download.pytorch.org/whl/cu118" + default: "https://download.pytorch.org/whl/test/cu118" steps: - run: name: Install Torch @@ -474,6 +474,7 @@ commands: - run: mkdir -p /tmp/artifacts - run: name: Run core / C++ tests + no_output_timeout: 15m environment: LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH" command: | @@ -1205,10 +1206,10 @@ parameters: # Nightly platform config torch-build: type: string - default: "2.0.0" + default: "2.0.1" torch-build-index: type: string - default: "https://download.pytorch.org/whl/cu118" + default: "https://download.pytorch.org/whl/test/cu118" torch-build-legacy: type: string default: "1.13.1+cu117" @@ -1217,7 +1218,7 @@ parameters: default: "https://download.pytorch.org/whl/cu117" cudnn-version: type: string - default: "8.5.0.96" + default: "8.8.0.121" trt-version-short: type: string default: "8.6.0" @@ -1412,4 +1413,3 @@ workflows: trt-version-short: << pipeline.parameters.trt-version-short >> cudnn-version: << pipeline.parameters.cudnn-version >> python-version: << pipeline.parameters.python-version >> - diff --git a/README.md b/README.md index bc46646d70..40423e4fdc 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,7 @@ In the case of building on top of a custom base container, you first must determ version of the PyTorch C++ ABI. If your source of PyTorch is pytorch.org, likely this is the pre-cxx11-abi in which case you must modify `//docker/dist-build.sh` to not build the C++11 ABI version of Torch-TensorRT. -You can then build the container using: - - -```bash -docker build --build-arg BASE_IMG= -f docker/Dockerfile -t torch_tensorrt:latest . -``` +You can then build the container using the build command in the [docker README](docker/README.md#instructions) If you would like to build outside a docker container, please follow the section [Compiling Torch-TensorRT](#compiling-torch-tensorrt) @@ -73,6 +68,7 @@ import torch_tensorrt ... trt_ts_module = torch_tensorrt.compile(torch_script_module, + # If the inputs to the module are plain Tensors, specify them via the `inputs` argument: inputs = [example_tensor, # Provide example tensor for input shape or... torch_tensorrt.Input( # Specify input object with shape and dtype min_shape=[1, 3, 224, 224], @@ -81,6 +77,12 @@ trt_ts_module = torch_tensorrt.compile(torch_script_module, # For static size shape=[1, 3, 224, 224] dtype=torch.half) # Datatype of input tensor. Allowed options torch.(float|half|int8|int32|bool) ], + + # For inputs containing tuples or lists of tensors, use the `input_signature` argument: + # Below, we have an input consisting of a Tuple of two Tensors (Tuple[Tensor, Tensor]) + # input_signature = ( (torch_tensorrt.Input(shape=[1, 3, 224, 224], dtype=torch.half), + # torch_tensorrt.Input(shape=[1, 3, 224, 224], dtype=torch.half)), ), + enabled_precisions = {torch.half}, # Run with FP16 ) @@ -114,17 +116,17 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass. - Bazel 5.2.0 -- Libtorch 2.0.0.dev20230103 (built with CUDA 11.7) -- CUDA 11.7 -- cuDNN 8.5.0 -- TensorRT 8.5.1.7 +- Libtorch 2.0.1 (built with CUDA 11.8) +- CUDA 11.8 +- cuDNN 8.8.0 +- TensorRT 8.6.0 ## Prebuilt Binaries and Wheel files Releases: https://github.com/pytorch/TensorRT/releases ``` -pip install torch-tensorrt==1.2.0 --find-links https://github.com/pytorch/TensorRT/releases/expanded_assets/v1.2.0 +pip install torch-tensorrt ``` ## Compiling Torch-TensorRT @@ -245,7 +247,7 @@ A tarball with the include files and library can then be found in bazel-bin ### Running Torch-TensorRT on a JIT Graph > Make sure to add LibTorch to your LD_LIBRARY_PATH
-> `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/bazel-Torch-TensorRT/external/libtorch/lib` +> `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/bazel-TensorRT/external/libtorch/lib` ``` shell bazel run //cpp/bin/torchtrtc -- $(realpath ) out.ts diff --git a/WORKSPACE b/WORKSPACE index be34aab833..5f0c1ffb74 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -51,17 +51,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", + sha256 = "c5174f18c0866421a5738d389aaea0c02f32a1a5be5f0747dc8dd0d96034c9b0", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-cxx11-abi-shared-with-deps-latest.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", + sha256 = "cc19b398cf435e0e34f347ef90fc11c2a42703998330a9c4a9fb0d2291737df7", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-shared-with-deps-latest.zip"], ) # Download these tarballs manually from the NVIDIA website @@ -71,10 +71,10 @@ http_archive( http_archive( name = "cudnn", build_file = "@//third_party/cudnn/archive:BUILD", - sha256 = "5454a6fd94f008728caae9adad993c4e85ef36302e26bce43bea7d458a5e7b6d", - strip_prefix = "cudnn-linux-x86_64-8.5.0.96_cuda11-archive", + sha256 = "36fff137153ef73e6ee10bfb07f4381240a86fb9fb78ce372414b528cbab2293", + strip_prefix = "cudnn-linux-x86_64-8.8.0.121_cuda11-archive", urls = [ - "https://developer.nvidia.com/compute/cudnn/secure/8.5.0/local_installers/11.7/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz", + "https://developer.download.nvidia.com/compute/cudnn/secure/8.8.0/local_installers/11.8/cudnn-linux-x86_64-8.8.0.121_cuda11-archive.tar.xz", ], ) diff --git a/docker/Dockerfile b/docker/Dockerfile index d51a4ee676..b21f29910b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,40 +1,54 @@ # Base image starts with CUDA -ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu20.04 +ARG BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04 FROM ${BASE_IMG} as base +ARG TENSORRT_VERSION +RUN test -n "$TENSORRT_VERSION" || (echo "No tensorrt version specified, please use --build-arg TENSORRT_VERSION=x.y.z to specify a version." && exit 1) +ARG CUDNN_VERSION +RUN test -n "$CUDNN_VERSION" || (echo "No cudnn version specified, please use --build-arg CUDNN_VERSION=x.y.z to specify a version." && exit 1) + +ARG PYTHON_VERSION=3.10 +ENV PYTHON_VERSION=${PYTHON_VERSION} + +ARG USE_CXX11_ABI +ENV USE_CXX11=${USE_CXX11_ABI} +ENV DEBIAN_FRONTEND=noninteractive + # Install basic dependencies RUN apt-get update -RUN DEBIAN_FRONTEND=noninteractive apt install -y build-essential manpages-dev wget zlib1g software-properties-common git -RUN add-apt-repository ppa:deadsnakes/ppa -RUN apt install -y python3.8 python3.8-distutils python3.8-dev -RUN wget https://bootstrap.pypa.io/get-pip.py -RUN ln -s /usr/bin/python3.8 /usr/bin/python -RUN python get-pip.py -RUN pip3 install wheel - -# Install Pytorch -RUN pip3 install torch==2.0.0.dev20230103+cu117 torchvision==0.15.0.dev20230103+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117 - -# Install CUDNN + TensorRT -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin -RUN mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub +RUN apt install -y build-essential manpages-dev wget zlib1g software-properties-common git libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8 + +# Install PyEnv and desired Python version +ENV HOME="/root" +ENV PYENV_DIR="$HOME/.pyenv" +ENV PATH="$PYENV_DIR/shims:$PYENV_DIR/bin:$PATH" +RUN wget -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer &&\ + chmod 755 pyenv-installer &&\ + bash pyenv-installer &&\ + eval "$(pyenv init -)" + +RUN pyenv install -v ${PYTHON_VERSION} +RUN pyenv global ${PYTHON_VERSION} + +# Install CUDNN + TensorRT + dependencies +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin +RUN mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/7fa2af80.pub RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC -RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" +RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" RUN apt-get update -RUN apt-get install -y libcudnn8=8.5.0* libcudnn8-dev=8.5.0* +RUN apt-get install -y libcudnn8=${CUDNN_VERSION}* libcudnn8-dev=${CUDNN_VERSION}* -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub -RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub +RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" RUN apt-get update -RUN apt-get install -y libnvinfer8=8.5.1* libnvinfer-plugin8=8.5.1* libnvinfer-dev=8.5.1* libnvinfer-plugin-dev=8.5.1* libnvonnxparsers8=8.5.1-1* libnvonnxparsers-dev=8.5.1-1* libnvparsers8=8.5.1-1* libnvparsers-dev=8.5.1-1* +RUN apt-get install -y libnvinfer8=${TENSORRT_VERSION}* libnvinfer-plugin8=${TENSORRT_VERSION}* libnvinfer-dev=${TENSORRT_VERSION}* libnvinfer-plugin-dev=${TENSORRT_VERSION}* libnvonnxparsers8=${TENSORRT_VERSION}-1* libnvonnxparsers-dev=${TENSORRT_VERSION}-1* libnvparsers8=${TENSORRT_VERSION}-1* libnvparsers-dev=${TENSORRT_VERSION}-1* -# Setup Bazel -ARG BAZEL_VERSION=5.2.0 -RUN wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-x86_64 -O /usr/bin/bazel \ - && chmod a+x /usr/bin/bazel +# Setup Bazel via Bazelisk +RUN wget -q https://github.com/bazelbuild/bazelisk/releases/download/v1.16.0/bazelisk-linux-amd64 -O /usr/bin/bazel &&\ + chmod a+x /usr/bin/bazel # Build Torch-TensorRT in an auxillary container FROM base as torch-tensorrt-builder-base @@ -43,10 +57,12 @@ ARG ARCH="x86_64" ARG TARGETARCH="amd64" RUN apt-get install -y python3-setuptools -RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub -RUN apt-get update +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub -RUN apt-get update && apt-get install -y --no-install-recommends locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8 +RUN apt-get update &&\ + apt-get install -y --no-install-recommends locales ninja-build &&\ + rm -rf /var/lib/apt/lists/* &&\ + locale-gen en_US.UTF-8 FROM torch-tensorrt-builder-base as torch-tensorrt-builder @@ -54,8 +70,11 @@ COPY . /workspace/torch_tensorrt/src WORKDIR /workspace/torch_tensorrt/src RUN cp ./docker/WORKSPACE.docker WORKSPACE +# Symlink the path pyenv is using for python with the /opt directory for package sourcing +RUN ln -s "`pyenv which python | xargs dirname | xargs dirname`/lib/python$PYTHON_VERSION/site-packages" "/opt/python3" + # This script builds both libtorchtrt bin/lib/include tarball and the Python wheel, in dist/ -RUN ./docker/dist-build.sh +RUN bash ./docker/dist-build.sh # Copy and install Torch-TRT into the main container FROM base as torch-tensorrt @@ -64,13 +83,13 @@ COPY . /opt/torch_tensorrt COPY --from=torch-tensorrt-builder /workspace/torch_tensorrt/src/py/dist/ . RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE -RUN pip3 install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl - -# Install native tensorrt python package required by torch_tensorrt whl file -RUN pip install tensorrt==8.5.1.7 +RUN pip install -r /opt/torch_tensorrt/py/requirements.txt +RUN pip install tensorrt==${TENSORRT_VERSION}.* +RUN pip install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl WORKDIR /opt/torch_tensorrt -ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} -ENV PATH /usr/local/lib/python3.8/dist-packages/torch_tensorrt/bin:${PATH} + +ENV LD_LIBRARY_PATH /opt/python3/site-packages/torch/lib:/opt/python3/site-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} +ENV PATH /opt/python3/site-packages/torch_tensorrt/bin:${PATH} CMD /bin/bash diff --git a/docker/README.md b/docker/README.md index 9ca93749a5..34253f20b5 100644 --- a/docker/README.md +++ b/docker/README.md @@ -2,10 +2,14 @@ * Use `Dockerfile` to build a container which provides the exact development environment that our master branch is usually tested against. -* `Dockerfile` currently uses the exact library versions (Torch, CUDA, CUDNN, TensorRT) listed in dependencies to build Torch-TensorRT. +* The `Dockerfile` currently uses Bazelisk to select the Bazel version, and uses the exact library versions of Torch and CUDA listed in dependencies. + * The desired versions of CUDNN and TensorRT must be specified as build-args, with major, minor, and patch versions as in: `--build-arg TENSORRT_VERSION=a.b.c --build-arg CUDNN_VERSION=x.y.z` + * [**Optional**] The desired base image be changed by explicitly setting a base image, as in `--build-arg BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04`, though this is optional + * [**Optional**] Additionally, the desired Python version can be changed by explicitly setting a version, as in `--build-arg PYTHON_VERSION=3.10`, though this is optional as well. * This `Dockerfile` installs `pre-cxx11-abi` versions of Pytorch and builds Torch-TRT using `pre-cxx11-abi` libtorch as well. -Note: To install `cxx11_abi` version of Torch-TensorRT, enable `USE_CXX11=1` flag so that `dist-build.sh` can build it accordingly. + +Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch-TRT. If you are using a workflow that requires a build of PyTorch on the CXX11 ABI (e.g. using the PyTorch NGC containers as a base image), add the Docker build argument: `--build-arg USE_CXX11_ABI=1` ### Dependencies @@ -13,11 +17,14 @@ Note: To install `cxx11_abi` version of Torch-TensorRT, enable `USE_CXX11=1` fla ### Instructions +- The example below uses CUDNN 8.8.0 and TensorRT 8.6.0 +- See dependencies for a list of current default dependencies. + > From root of Torch-TensorRT repo Build: ``` -DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile -t torch_tensorrt:latest . +DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6.0 --build-arg CUDNN_VERSION=8.8.0 -f docker/Dockerfile -t torch_tensorrt:latest . ``` Run: @@ -38,4 +45,4 @@ bazel test //tests/core/conversion/converters:test_activation --compilation_mode ### Pytorch NGC containers -We also ship Torch-TensorRT in Pytorch NGC containers . Release notes for these containers can be found here. Check out `release/ngc/23.XX` branch of Torch-TensorRT for source code that gets shipped with `23.XX` version of Pytorch NGC container. \ No newline at end of file +We also ship Torch-TensorRT in Pytorch NGC containers . Release notes for these containers can be found here. Check out `release/ngc/23.XX` branch of Torch-TensorRT for source code that gets shipped with `23.XX` version of Pytorch NGC container. diff --git a/docker/WORKSPACE.docker b/docker/WORKSPACE.docker index 8a521ce5a1..dd5ff3cd9a 100755 --- a/docker/WORKSPACE.docker +++ b/docker/WORKSPACE.docker @@ -48,20 +48,16 @@ new_local_repository( # Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) ############################################################################################################# -http_archive( +new_local_repository( name = "libtorch", - build_file = "@//third_party/libtorch:BUILD", - sha256 = "59b8b5e1954a86d50b79c13f06398d385b200da13e37a08ecf31d3c62e5ca127", - strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230103%2Bcu117.zip"], + path = "/opt/python3/site-packages/torch/", + build_file = "third_party/libtorch/BUILD" ) -http_archive( +new_local_repository( name = "libtorch_pre_cxx11_abi", - build_file = "@//third_party/libtorch:BUILD", - sha256 = "e260fc7476be89d1650953e8643e9f7363845f5a52de4bab87ac0e619c1f6ad4", - strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230103%2Bcu117.zip"], + path = "/opt/python3/site-packages/torch/", + build_file = "third_party/libtorch/BUILD" ) #################################################################################### diff --git a/docker/WORKSPACE.ngc b/docker/WORKSPACE.ngc index 7fd4aad4e6..3e570c63f1 100755 --- a/docker/WORKSPACE.ngc +++ b/docker/WORKSPACE.ngc @@ -33,7 +33,7 @@ git_repository( # This is currently used in pytorch NGC container CI testing. local_repository( name = "torch_tensorrt", - path = "/usr/local/lib/python3.8/dist-packages/torch_tensorrt" + path = "/opt/python3/site-packages/torch_tensorrt/" ) # CUDA should be installed on the system locally @@ -55,13 +55,13 @@ new_local_repository( new_local_repository( name = "libtorch", - path = "/usr/local/lib/python3.8/dist-packages/torch", + path = "/opt/python3/site-packages/torch/", build_file = "third_party/libtorch/BUILD" ) new_local_repository( name = "libtorch_pre_cxx11_abi", - path = "/usr/local/lib/python3.8/dist-packages/torch", + path = "/opt/python3/site-packages/torch/", build_file = "third_party/libtorch/BUILD" ) diff --git a/docker/dist-build.sh b/docker/dist-build.sh index cf209258be..ed5b271825 100755 --- a/docker/dist-build.sh +++ b/docker/dist-build.sh @@ -10,9 +10,13 @@ fi cd ${TOP_DIR} \ && mkdir -p dist && cd py \ - && pip install -r requirements.txt \ - && MAX_JOBS=1 LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 \ - ${BUILD_CMD} $* || exit 1 + && pip install -r requirements.txt + +# Symlink the path pyenv is using for python with the /opt directory for package sourcing +ln -s "`pyenv which python | xargs dirname | xargs dirname`/lib/python$PYTHON_VERSION/site-packages" "/opt/python3" + +# Build Torch-TRT +MAX_JOBS=1 LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 ${BUILD_CMD} $* || exit 1 pip3 install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org jupyter nbextension enable --py widgetsnbextension diff --git a/py/requirements.txt b/py/requirements.txt index c06e1eeb58..c40247629b 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,7 +1,7 @@ numpy pybind11==2.6.2 ---extra-index-url https://download.pytorch.org/whl/nightly/cu118 -torch==2.0.0 -torchvision==0.15.1 +--extra-index-url https://download.pytorch.org/whl/test/cu118 +torch==2.0.1 +torchvision==0.15.2 --extra-index-url https://pypi.ngc.nvidia.com tensorrt==8.6.0 diff --git a/py/setup.py b/py/setup.py index 7e501b667c..d88ae4fc04 100644 --- a/py/setup.py +++ b/py/setup.py @@ -31,6 +31,8 @@ FX_ONLY = False +LEGACY = False + RELEASE = False CI_RELEASE = False @@ -48,6 +50,10 @@ def get_git_revision_short_hash() -> str: FX_ONLY = True sys.argv.remove("--fx-only") +if "--legacy" in sys.argv: + LEGACY = True + sys.argv.remove("--legacy") + if "--release" not in sys.argv: __version__ = __version__ + "+" + get_git_revision_short_hash() else: @@ -380,7 +386,7 @@ def run(self): long_description=long_description, ext_modules=ext_modules, install_requires=[ - "torch==2.0.0", + "torch >=2.0.1,<2.1" if not LEGACY else "torch >=1.13.0,<2.0", ], setup_requires=[], cmdclass={ @@ -409,7 +415,7 @@ def run(self): "Topic :: Software Development", "Topic :: Software Development :: Libraries", ], - python_requires=">=3.7", + python_requires=">=3.8", include_package_data=True, package_data={ "torch_tensorrt": [ diff --git a/py/torch_tensorrt/fx/README.md b/py/torch_tensorrt/fx/README.md index d53f43a1d4..3e7cec9c44 100644 --- a/py/torch_tensorrt/fx/README.md +++ b/py/torch_tensorrt/fx/README.md @@ -8,14 +8,14 @@ FX2TRT is merged as FX module in Torch-TensorRT ``` $ conda create --name python_env python=3.8 $ conda activate python_env - # Recommend to install PyTorch 1.12 and later - $ conda install pytorch torchvision torchtext cudatoolkit=11.3 -c pytorch-nightly + # Recommend to install PyTorch 2.0 and later + $ conda install pytorch torchvision torchtext cudatoolkit=11.8 -c pytorch-nightly # Install TensorRT python package $ pip3 install nvidia-pyindex - $ pip3 install tensorrt==8.5.1.7 + $ pip3 install tensorrt==8.6.0.12 $ git clone https://github.com/pytorch/TensorRT.git $ cd TensorRT/py && python setup.py install --fx-only && cd .. - $ pyton -c "import torch_tensorrt.fx" + $ python -c "import torch_tensorrt.fx" # Test an example by $ python py/torch_tensorrt/fx/example/lower_example.py ``` diff --git a/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py b/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py index eeeb9c9eeb..36420375f8 100644 --- a/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py +++ b/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py @@ -54,11 +54,6 @@ def forward(self, x): apply_passes=[fuse_permute_linear], ) - # TODO: The following test has been disabled due to a bug in TRT 8.5.1.7 - # with self.linear2. Issue : https://github.com/pytorch/TensorRT/issues/1444 - @unittest.skip( - reason="test_multi_fuse_permute_linear has been disabled due to a bug in TRT 8.5.1.7 https://github.com/pytorch/TensorRT/issues/1444" - ) def test_multi_fuse_permute_linear(self): """ Fusion when permute output is shared by multiple linears diff --git a/py/versions.py b/py/versions.py index 3633ad114f..bc6fdb8959 100644 --- a/py/versions.py +++ b/py/versions.py @@ -1,4 +1,4 @@ __version__ = "1.4.0.dev0" -__cuda_version__ = "11.7" -__cudnn_version__ = "8.5" -__tensorrt_version__ = "8.5" +__cuda_version__ = "11.8" +__cudnn_version__ = "8.8" +__tensorrt_version__ = "8.6" diff --git a/pyproject.toml b/pyproject.toml index bb07e2f7cf..3b54e05791 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,7 @@ requires = [ "cffi", "typing_extensions", "future", - "nvidia-pyindex", - "nvidia-tensorrt==8.4.3.1" + "tensorrt >=8.6,<8.7" ] # Use legacy backend to import local packages in setup.py @@ -20,7 +19,7 @@ requires = [ [tool.black] # Uncomment if pyproject.toml worked fine to ensure consistency with flake8 # line-length = 120 -target-versions = ["py37", "py38", "py39", "py310"] +target-versions = ["py38", "py39", "py310"] force-exclude = """ elu_converter/setup.py """ diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel index 01d4d14bfe..6e1e688b36 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel @@ -56,17 +56,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", + sha256 = "c5174f18c0866421a5738d389aaea0c02f32a1a5be5f0747dc8dd0d96034c9b0", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-cxx11-abi-shared-with-deps-latest.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", + sha256 = "cc19b398cf435e0e34f347ef90fc11c2a42703998330a9c4a9fb0d2291737df7", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-shared-with-deps-latest.zip"], ) #################################################################################### diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu index 01d4d14bfe..6e1e688b36 100644 --- a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu @@ -56,17 +56,17 @@ new_local_repository( http_archive( name = "libtorch", build_file = "@//third_party/libtorch:BUILD", - sha256 = "292b3f81e7c857fc102be93e2e44c40cdb4d8ef03d98121bc6af434c66e8490b", + sha256 = "c5174f18c0866421a5738d389aaea0c02f32a1a5be5f0747dc8dd0d96034c9b0", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-cxx11-abi-shared-with-deps-latest.zip"], ) http_archive( name = "libtorch_pre_cxx11_abi", build_file = "@//third_party/libtorch:BUILD", - sha256 = "f3cbd7e9593f0c64b8671d02a21d562c98b60ef1abf5898c0ee9acfbc5a6b5d2", + sha256 = "cc19b398cf435e0e34f347ef90fc11c2a42703998330a9c4a9fb0d2291737df7", strip_prefix = "libtorch", - urls = ["https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.0.0%2Bcu118.zip"], + urls = ["https://download.pytorch.org/libtorch/test/cu118/libtorch-shared-with-deps-latest.zip"], ) #################################################################################### From d3e4f4cdb9a1960fa63ec904962e3ef668be2cc9 Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Tue, 2 May 2023 19:00:29 -0700 Subject: [PATCH 8/9] Revert "chore: remove aten::all dim tests" This reverts commit e2184bc96a045eb245071dc1e82ccec5cfbc91c6. --- .../conversion/converters/test_reduce.cpp | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/core/conversion/converters/test_reduce.cpp b/tests/core/conversion/converters/test_reduce.cpp index 2e2bb5cbd3..3cdb2d3b84 100644 --- a/tests/core/conversion/converters/test_reduce.cpp +++ b/tests/core/conversion/converters/test_reduce.cpp @@ -348,3 +348,47 @@ TEST(Converters, ATenAnyDimNegIndexConvertsCorrectly) { auto in = at::randint(-2, 2, {2, 32}, at::kCUDA); test_body(graph, in); } + +TEST(Converters, ATenAllDimConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : int = prim::Constant[value=-1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(0, 2, {64, 2}, at::kCUDA); + test_body(graph, in); +} + +TEST(Converters, ATenAllDimKeepDimConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : int = prim::Constant[value=0]() + %3 : bool = prim::Constant[value=1]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(-2, 2, {2, 32}, at::kCUDA).to(torch::kBool); + test_body(graph, in); +} + +TEST(Converters, ATenAllDimAllTrueConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : int = prim::Constant[value=1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::ones({2, 32}, at::kCUDA); + test_body(graph, in); +} + +TEST(Converters, ATenAllDimDynamicConvertsCorrectly) { + const auto graph = R"IR( + graph(%0 : Tensor): + %1 : int = prim::Constant[value=-1]() + %3 : bool = prim::Constant[value=0]() + %5 : Tensor = aten::all(%0, %1, %3) + return (%5))IR"; + auto in = at::randint(0, 2, {64, 2}, at::kCUDA).to(torch::kHalf); + test_body(graph, in, true); +} From e97e0c486770f43677ebf22066e3634e779a8a54 Mon Sep 17 00:00:00 2001 From: George S <113141689+gs-olive@users.noreply.github.com> Date: Wed, 3 May 2023 15:01:39 -0700 Subject: [PATCH 9/9] Update versions.py --- py/versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/versions.py b/py/versions.py index ccfcede1ab..114e4df5bb 100644 --- a/py/versions.py +++ b/py/versions.py @@ -1,4 +1,4 @@ __version__ = "1.4.0.rc0" __cuda_version__ = "11.8" __cudnn_version__ = "8.8" -__tensorrt_version__ = "8.6" \ No newline at end of file +__tensorrt_version__ = "8.6"