From 6fac76c4ad9085d956a1f7cf86ae057a583810b2 Mon Sep 17 00:00:00 2001 From: Simon Branford Date: Thu, 19 Jan 2023 09:31:44 +0000 Subject: [PATCH 1/7] adding easyconfigs: PyTorch-1.13.1-foss-2022a.eb and patches: PyTorch-1.13.1_fix-test-ops-conf.patch, PyTorch-1.13.1_no-cuda-stubs-rpath.patch, PyTorch-1.13.1_remove-flaky-test-in-testnn.patch, PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch --- .../p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 95 +++++++++ .../PyTorch-1.13.1_fix-test-ops-conf.patch | 26 +++ .../PyTorch-1.13.1_no-cuda-stubs-rpath.patch | 186 ++++++++++++++++++ ...h-1.13.1_remove-flaky-test-in-testnn.patch | 21 ++ ...skip-ao-sparsity-test-without-fbgemm.patch | 27 +++ 5 files changed, 355 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-test-ops-conf.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb new file mode 100644 index 00000000000..93d6830c264 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -0,0 +1,95 @@ +name = 'PyTorch' +version = '1.13.1' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2022a'} + +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] +patches = [ + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', + 'PyTorch-1.11.0_fix-fsdp-fp16-test.patch', + 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', + 'PyTorch-1.12.1_fix-skip-decorators.patch', + 'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', + 'PyTorch-1.12.1_fix-test_wishart_log_prob.patch', + 'PyTorch-1.12.1_fix-TestTorch.test_to.patch', + 'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch', + 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', + 'PyTorch-1.12.1_fix-vsx-loadu.patch', + 'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch', + 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', + 'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch', + 'PyTorch-1.13.1_fix-test-ops-conf.patch', + 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', +] +checksums = [ + {'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'}, + {'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'}, + {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': + '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, + {'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'}, + {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': + '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, + {'PyTorch-1.12.1_fix-test_wishart_log_prob.patch': + 'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'}, + {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, + {'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch': + '0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'}, + {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'}, + {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, + {'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch': + '1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d'}, + {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': + 'ad8db280c1acb5fade65646097590c6b332a9caf722191e4a3ddba2fb945ee6d'}, + {'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch': + 'ea391298c4d9984de87cc14da9740ff09137b4a832fafd9e60c576f81690e8ec'}, + {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, + {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '1a8f712e474f64da878b9328ce05e7245afcba1765cbc27fb5f4f16f733ea175'}, +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.23.1'), + ('hypothesis', '6.46.7'), +] + +dependencies = [ + ('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions + ('Python', '3.10.4'), + ('protobuf', '3.19.4'), + ('protobuf-python', '3.19.4'), + ('pybind11', '2.9.2'), + ('SciPy-bundle', '2022.05'), + ('PyYAML', '6.0'), + ('MPFR', '4.1.0'), + ('GMP', '6.2.1'), + ('numactl', '2.0.14'), + ('FFmpeg', '4.4.2'), + ('Pillow', '9.1.1'), + ('expecttest', '0.1.3'), +] + +excluded_tests = { + '': [ + # This test seems to take too long on NVIDIA Ampere at least. + 'distributed/test_distributed_spawn', + # Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375 + 'distributions/test_constraints', + # no xdoctest + 'doctests', + ] +} + +runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s' + +tests = ['PyTorch-check-cpp-extension.py'] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-test-ops-conf.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-test-ops-conf.patch new file mode 100644 index 00000000000..d30746dab4d --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-test-ops-conf.patch @@ -0,0 +1,26 @@ +From 8581301957b0018a32433f85163535709bc9d332 Mon Sep 17 00:00:00 2001 +From: Masaki Kozuki +Date: Fri, 7 Oct 2022 21:25:07 -0700 +Subject: [PATCH] try using a different group name + +ref: +https://github.com/pytorch/pytorch/issues/85923#issuecomment-1272220271 + +Signed-off-by: Masaki Kozuki +--- + functorch/test/conftest.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/functorch/test/conftest.py b/functorch/test/conftest.py +index d2e929a9a58db..afc39d9f35de9 100644 +--- a/functorch/test/conftest.py ++++ b/functorch/test/conftest.py +@@ -17,7 +17,7 @@ + + + def pytest_addoption(parser: Parser) -> None: +- group = parser.getgroup("terminal reporting") ++ group = parser.getgroup("terminal reporting functorch") + group.addoption( + "--junit-xml-reruns", + action="store", diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch new file mode 100644 index 00000000000..772677f267a --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch @@ -0,0 +1,186 @@ +# PyTorch's CMAKE configuration by default sets RUNPATH on libraries if they link other libraries +# that are outside the build tree, which is done because of the CMAKE config on +# https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L10. +# This provides problems, since the cuda stubs library path then also gets added to the RUNPATH. +# As a result, at runtime, the stub version of things like libcuda.so.1 gets picked up, instead of the real drivers +# See https://github.com/easybuilders/easybuild-easyconfigs/issues/14359 +# This line https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L16 +# Makes sure that any path that is linked, is also added to the RUNPATH. +# This has been reported upstream in https://github.com/pytorch/pytorch/issues/35418 +# and a fix was attempted in https://github.com/pytorch/pytorch/pull/37737 but it was reverted +# +# This EasyBuild patch changes behavior for the libraries that were failing, i.e. the ones in this list: +# https://github.com/easybuilders/easybuild-easyconfigs/issues/14359#issuecomment-970479904 +# This is done by setting INSTALL_RPATH_USE_LINK_PATH to false, and instead, specifying the RPATH +# explicitely by defining INSTALL_RPATH, but only adding directories that do not match to the "stubs" regex +# +# Original patch: Caspar van Leeuwen +# Updated: Alexander Grund (TU Dresden) +# Updated: Simon Branford (University of Birmingham) +# +# See https://github.com/pytorch/pytorch/pull/87593 + +diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt +index 4182797fc78e5..265bf4f660896 100644 +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -631,13 +631,12 @@ endif() + if(USE_CUDA) + list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) + add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS}) ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB}) + if(MSVC) + # Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine +- set(DELAY_LOAD_FLAGS "-DELAYLOAD:nvcuda.dll;delayimp.lib") +- else() +- set(DELAY_LOAD_FLAGS "") ++ target_link_libraries(caffe2_nvrtc "-DELAYLOAD:nvcuda.dll;delayimp.lib") + endif() +- target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} ${DELAY_LOAD_FLAGS}) + target_include_directories(caffe2_nvrtc PRIVATE ${CUDA_INCLUDE_DIRS}) + install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") + if(USE_NCCL) +diff --git a/cmake/LinkCudaLibraries.cmake b/cmake/LinkCudaLibraries.cmake +new file mode 100644 +index 0000000000000..005914ccc6f7c +--- /dev/null ++++ b/cmake/LinkCudaLibraries.cmake +@@ -0,0 +1,33 @@ ++# Link CUDA libraries to the given target, i.e.: `target_link_libraries(target )` ++# ++# Additionally makes sure CUDA stub libs don't end up being in RPath ++# ++# Example: link_cuda_libraries(mytarget PRIVATE ${CUDA_LIBRARIES}) ++function(link_cuda_libraries target) ++ set(libs ${ARGN}) ++ set(install_rpath "$ORIGIN") ++ set(filtered FALSE) ++ foreach(lib IN LISTS libs) ++ # CUDA stub libs are in form /prefix/lib/stubs/libcuda.so ++ # So extract the name of the parent folder, to check against "stubs" ++ # And the parent path which we need to add to the INSTALL_RPATH for non-stubs ++ get_filename_component(parent_path "${lib}" DIRECTORY) ++ get_filename_component(parent_name "${parent_path}" NAME) ++ if(parent_name STREQUAL "stubs") ++ message(STATUS "Filtering ${lib} from being set in ${target}'s RPATH, " ++ "because it appears to point to the CUDA stubs directory.") ++ set(filtered TRUE) ++ elseif(parent_path) ++ list(APPEND install_rpath ${parent_path}) ++ endif() ++ endforeach() ++ ++ # Regular link command ++ target_link_libraries(${target} ${scope} ${libs}) ++ # Manually set INSTALL_RPATH when there were any stub libs ++ if(filtered) ++ list(REMOVE_DUPLICATES install_rpath) ++ set_target_properties(${target} PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE) ++ set_target_properties(${target} PROPERTIES INSTALL_RPATH "${install_rpath}") ++ endif() ++endfunction() +diff --git a/test/cpp/api/CMakeLists.txt b/test/cpp/api/CMakeLists.txt +index 6b801a0731827..6ac92870479e0 100644 +--- a/test/cpp/api/CMakeLists.txt ++++ b/test/cpp/api/CMakeLists.txt +@@ -54,7 +54,8 @@ if(NOT MSVC) + endif() + + if(USE_CUDA) +- target_link_libraries(test_api PRIVATE ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(test_api PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} +diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt +index 9969c63e16d57..356ba5be55c4e 100644 +--- a/test/cpp/dist_autograd/CMakeLists.txt ++++ b/test/cpp/dist_autograd/CMakeLists.txt +@@ -10,7 +10,8 @@ if(USE_DISTRIBUTED AND NOT WIN32) + target_link_libraries(test_dist_autograd PRIVATE torch gtest) + + if(USE_CUDA) +- target_link_libraries(test_dist_autograd PRIVATE ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(test_dist_autograd PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} +diff --git a/test/cpp/jit/CMakeLists.txt b/test/cpp/jit/CMakeLists.txt +index b8b765a68d8b4..aba9c8c6c3e17 100644 +--- a/test/cpp/jit/CMakeLists.txt ++++ b/test/cpp/jit/CMakeLists.txt +@@ -156,7 +156,8 @@ if(LINUX) + endif() + + if(USE_CUDA) +- target_link_libraries(test_jit PRIVATE ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(test_jit PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} +diff --git a/test/cpp/rpc/CMakeLists.txt b/test/cpp/rpc/CMakeLists.txt +index 3997f8753e555..21fddbc645d0d 100644 +--- a/test/cpp/rpc/CMakeLists.txt ++++ b/test/cpp/rpc/CMakeLists.txt +@@ -33,7 +33,8 @@ target_include_directories( + target_link_libraries(test_cpp_rpc PRIVATE ${TORCH_RPC_TEST_DEPENDENCY_LIBS}) + + if(USE_CUDA) +- target_link_libraries(test_cpp_rpc PRIVATE ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(test_cpp_rpc PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} +diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt +index 7dff70630d3ec..ecb83005492f5 100644 +--- a/test/cpp/tensorexpr/CMakeLists.txt ++++ b/test/cpp/tensorexpr/CMakeLists.txt +@@ -57,14 +57,15 @@ if(USE_PTHREADPOOL) + target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface) + endif() + if(USE_CUDA) +- target_link_libraries(test_tensorexpr PRIVATE ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(test_tensorexpr PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} + ${TORCH_CUDA_LIBRARIES}) + target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA) + +- target_link_libraries(tutorial_tensorexpr PRIVATE ++ link_cuda_libraries(tutorial_tensorexpr PRIVATE + ${CUDA_LIBRARIES} + ${CUDA_NVRTC_LIB} + ${CUDA_CUDA_LIB} +diff --git a/test/test_torch.py b/test/test_torch.py +index 31759213ecefc..67ebc0420f38a 100644 +--- a/test/test_torch.py ++++ b/test/test_torch.py +@@ -8581,6 +8581,21 @@ def add_neg_dim_tests(): + assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name + setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim)) + ++class TestRPATH(TestCase): ++ @unittest.skipIf(not sys.platform.startswith('linux'), "linux-only test") ++ def test_rpath(self): ++ """ ++ Make sure RPATH (or RUNPATH) in nvrtc does not contain a cuda stubs directory ++ issue gh-35418 ++ """ ++ libdir = os.path.join(os.path.dirname(torch._C.__file__), 'lib') ++ caffe2_nvrtc = os.path.join(libdir, 'libcaffe2_nvrtc.so') ++ if os.path.exists(caffe2_nvrtc): ++ output = subprocess.check_output(['objdump', '-x', caffe2_nvrtc]) ++ for line in output.split(b'\n'): ++ if b'RPATH' in line or b'RUNPATH' in line: ++ self.assertFalse(b'stubs' in line) ++ + # TODO: these empy classes are temporarily instantiated for XLA compatibility + # once XLA updates their test suite it should be removed + class TestViewOps(TestCase): diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch new file mode 100644 index 00000000000..7a76b5fe01e --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch @@ -0,0 +1,21 @@ +This test is flaky when run with the type float16. +I.e. TestNNDeviceTypeCPU.test_embedding_bag_device_cpu_int32_int32_float16 & + TestNNDeviceTypeCPU.test_embedding_bag_device_cpu_int64_int64_float16 +fail. +See https://github.com/pytorch/pytorch/issues/86638 +So remove the half precision test. + +Author: Simon Branford (University of Birmingham) +Based off 1.12.1 patch by Alexander Grund (TU Dresden) + +--- a/test/nn/test_embedding.py ++++ b/test/nn/test_embedding.py +@@ -18236,7 +18236,7 @@ class TestNNDeviceType(NNTestCase): + self.assertRaises(RuntimeError, lambda: es(input.view(-1), offset)) + + @skipMeta +- @dtypes(*itertools.product((torch.int, torch.long), (torch.int, torch.long), (torch.float, torch.double, torch.half))) ++ @dtypes(*itertools.product((torch.int, torch.long), (torch.int, torch.long), (torch.float, torch.double))) + def test_embedding_bag_device(self, device, dtypes): + with set_default_dtype(torch.double): + self._test_EmbeddingBag(device, 'sum', False, wdtype=dtypes[2], dtype=dtypes[0], odtype=dtypes[1]) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch new file mode 100644 index 00000000000..fe5d7b06a51 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch @@ -0,0 +1,27 @@ +Those tests (from test_ao_sparsity) require FBGEMM which may not be available. +So add the skip decorator. +See https://github.com/pytorch/pytorch/issues/87364 + +Author: Simon Branford (University of Birmingham) +Based off 1.12.1 patch by Alexander Grund (TU Dresden) + +diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py +index b44c885507..b7d35343c0 100644 +--- a/test/ao/sparsity/test_composability.py ++++ b/test/ao/sparsity/test_composability.py +@@ -9,6 +9,7 @@ import torch.ao.quantization as tq + from torch import nn + from torch.ao import sparsity + from torch.testing._internal.common_utils import TestCase ++from torch.testing._internal.common_quantization import skipIfNoFBGEMM + from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx + from torch.ao.sparsity import fqn_to_module + +@@ -23,6 +24,7 @@ sparse_defaults = { + # This series of tests are to check the composability goals for sparsity and quantization. Namely + # that performing quantization and sparsity model manipulations in various orderings + # does not cause problems ++@skipIfNoFBGEMM + class TestComposability(TestCase): + def _get_model_and_sparsifier_and_sparse_config(self, qconfig=None): + model = nn.Sequential( From 3f6e57dbae734d90a094473e2eb730c7b731aaa7 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 10 Feb 2023 11:43:39 +0100 Subject: [PATCH 2/7] Update patches based on PyTorch 1.13.1 --- .../p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 12 +++++------ .../PyTorch-1.13.1_no-cuda-stubs-rpath.patch | 21 +++++++++---------- ...h-1.13.1_remove-flaky-test-in-testnn.patch | 9 ++++---- ...skip-ao-sparsity-test-without-fbgemm.patch | 11 +++++----- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index 93d6830c264..151fcf65d7c 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -22,10 +22,10 @@ patches = [ 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', 'PyTorch-1.12.1_fix-vsx-loadu.patch', 'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch', - 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', - 'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch', 'PyTorch-1.13.1_fix-test-ops-conf.patch', 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', + 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', + 'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch', ] checksums = [ {'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'}, @@ -46,12 +46,12 @@ checksums = [ {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, {'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch': '1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d'}, + {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, + {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'}, {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': - 'ad8db280c1acb5fade65646097590c6b332a9caf722191e4a3ddba2fb945ee6d'}, + 'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'}, {'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch': - 'ea391298c4d9984de87cc14da9740ff09137b4a832fafd9e60c576f81690e8ec'}, - {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, - {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '1a8f712e474f64da878b9328ce05e7245afcba1765cbc27fb5f4f16f733ea175'}, + '92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'}, ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch index 772677f267a..be2335491ea 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_no-cuda-stubs-rpath.patch @@ -16,12 +16,11 @@ # # Original patch: Caspar van Leeuwen # Updated: Alexander Grund (TU Dresden) -# Updated: Simon Branford (University of Birmingham) # # See https://github.com/pytorch/pytorch/pull/87593 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index 4182797fc78e5..265bf4f660896 100644 +index 9074b848411..1d45807189b 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -631,13 +631,12 @@ endif() @@ -43,7 +42,7 @@ index 4182797fc78e5..265bf4f660896 100644 if(USE_NCCL) diff --git a/cmake/LinkCudaLibraries.cmake b/cmake/LinkCudaLibraries.cmake new file mode 100644 -index 0000000000000..005914ccc6f7c +index 00000000000..005914ccc6f --- /dev/null +++ b/cmake/LinkCudaLibraries.cmake @@ -0,0 +1,33 @@ @@ -81,7 +80,7 @@ index 0000000000000..005914ccc6f7c + endif() +endfunction() diff --git a/test/cpp/api/CMakeLists.txt b/test/cpp/api/CMakeLists.txt -index 6b801a0731827..6ac92870479e0 100644 +index 6b801a07318..6ac92870479 100644 --- a/test/cpp/api/CMakeLists.txt +++ b/test/cpp/api/CMakeLists.txt @@ -54,7 +54,8 @@ if(NOT MSVC) @@ -95,7 +94,7 @@ index 6b801a0731827..6ac92870479e0 100644 ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt -index 9969c63e16d57..356ba5be55c4e 100644 +index 9969c63e16d..356ba5be55c 100644 --- a/test/cpp/dist_autograd/CMakeLists.txt +++ b/test/cpp/dist_autograd/CMakeLists.txt @@ -10,7 +10,8 @@ if(USE_DISTRIBUTED AND NOT WIN32) @@ -109,10 +108,10 @@ index 9969c63e16d57..356ba5be55c4e 100644 ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} diff --git a/test/cpp/jit/CMakeLists.txt b/test/cpp/jit/CMakeLists.txt -index b8b765a68d8b4..aba9c8c6c3e17 100644 +index 66a60fb01ca..005e18183d2 100644 --- a/test/cpp/jit/CMakeLists.txt +++ b/test/cpp/jit/CMakeLists.txt -@@ -156,7 +156,8 @@ if(LINUX) +@@ -148,7 +148,8 @@ if(LINUX) endif() if(USE_CUDA) @@ -123,7 +122,7 @@ index b8b765a68d8b4..aba9c8c6c3e17 100644 ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} diff --git a/test/cpp/rpc/CMakeLists.txt b/test/cpp/rpc/CMakeLists.txt -index 3997f8753e555..21fddbc645d0d 100644 +index 3997f8753e5..21fddbc645d 100644 --- a/test/cpp/rpc/CMakeLists.txt +++ b/test/cpp/rpc/CMakeLists.txt @@ -33,7 +33,8 @@ target_include_directories( @@ -137,7 +136,7 @@ index 3997f8753e555..21fddbc645d0d 100644 ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt -index 7dff70630d3ec..ecb83005492f5 100644 +index 7dff70630d3..ecb83005492 100644 --- a/test/cpp/tensorexpr/CMakeLists.txt +++ b/test/cpp/tensorexpr/CMakeLists.txt @@ -57,14 +57,15 @@ if(USE_PTHREADPOOL) @@ -159,10 +158,10 @@ index 7dff70630d3ec..ecb83005492f5 100644 ${CUDA_NVRTC_LIB} ${CUDA_CUDA_LIB} diff --git a/test/test_torch.py b/test/test_torch.py -index 31759213ecefc..67ebc0420f38a 100644 +index 8de5b822d00..fce7b5714f1 100644 --- a/test/test_torch.py +++ b/test/test_torch.py -@@ -8581,6 +8581,21 @@ def add_neg_dim_tests(): +@@ -8414,6 +8414,21 @@ def add_neg_dim_tests(): assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim)) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch index 7a76b5fe01e..c95d2227258 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_remove-flaky-test-in-testnn.patch @@ -5,12 +5,13 @@ fail. See https://github.com/pytorch/pytorch/issues/86638 So remove the half precision test. -Author: Simon Branford (University of Birmingham) -Based off 1.12.1 patch by Alexander Grund (TU Dresden) +Author: Alexander Grund (TU Dresden) +diff --git a/test/nn/test_embedding.py b/test/nn/test_embedding.py +index f76e01c65c5..6b5de2b1059 100644 --- a/test/nn/test_embedding.py +++ b/test/nn/test_embedding.py -@@ -18236,7 +18236,7 @@ class TestNNDeviceType(NNTestCase): +@@ -1108,7 +1108,7 @@ class TestEmbeddingNNDeviceType(NNTestCase): self.assertRaises(RuntimeError, lambda: es(input.view(-1), offset)) @skipMeta @@ -18,4 +19,4 @@ Based off 1.12.1 patch by Alexander Grund (TU Dresden) + @dtypes(*itertools.product((torch.int, torch.long), (torch.int, torch.long), (torch.float, torch.double))) def test_embedding_bag_device(self, device, dtypes): with set_default_dtype(torch.double): - self._test_EmbeddingBag(device, 'sum', False, wdtype=dtypes[2], dtype=dtypes[0], odtype=dtypes[1]) + self._test_EmbeddingBag(device, 'sum', False, wdtype=dtypes[2], dtype=dtypes[0], odtype=dtypes[1]) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch index fe5d7b06a51..481b013de21 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch @@ -2,11 +2,10 @@ Those tests (from test_ao_sparsity) require FBGEMM which may not be available. So add the skip decorator. See https://github.com/pytorch/pytorch/issues/87364 -Author: Simon Branford (University of Birmingham) -Based off 1.12.1 patch by Alexander Grund (TU Dresden) +Author: Alexander Grund (TU Dresden) diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py -index b44c885507..b7d35343c0 100644 +index 6a1b6067a4c..b2eed72e3e3 100644 --- a/test/ao/sparsity/test_composability.py +++ b/test/ao/sparsity/test_composability.py @@ -9,6 +9,7 @@ import torch.ao.quantization as tq @@ -17,11 +16,11 @@ index b44c885507..b7d35343c0 100644 from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx from torch.ao.sparsity import fqn_to_module -@@ -23,6 +24,7 @@ sparse_defaults = { +@@ -62,6 +63,7 @@ def _calculate_sparsity(tensor): # This series of tests are to check the composability goals for sparsity and quantization. Namely # that performing quantization and sparsity model manipulations in various orderings # does not cause problems +@skipIfNoFBGEMM class TestComposability(TestCase): - def _get_model_and_sparsifier_and_sparse_config(self, qconfig=None): - model = nn.Sequential( + # This test checks whether performing quantization prepare before sparse prepare + # causes any issues and verifies that the correct observers are inserted and that From 5f52236df3afcd51c152dce775072a90b04ed963 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 10 Feb 2023 14:36:26 +0100 Subject: [PATCH 3/7] Fix test_ops* startup failures Those tests require 2 pytest plugins and a bugfix. --- .../p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 6 ++++ .../PyTorch-1.13.1_fix-pytest-args.patch | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-pytest-args.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index 151fcf65d7c..c3837e6e707 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -22,6 +22,7 @@ patches = [ 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', 'PyTorch-1.12.1_fix-vsx-loadu.patch', 'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch', + 'PyTorch-1.13.1_fix-pytest-args.patch', 'PyTorch-1.13.1_fix-test-ops-conf.patch', 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', @@ -46,6 +47,8 @@ checksums = [ {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, {'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch': '1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d'}, + {'PyTorch-1.13.1_fix-pytest-args.patch': + 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'}, {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'}, {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': @@ -59,6 +62,9 @@ osdependencies = [OS_PKG_IBVERBS_DEV] builddependencies = [ ('CMake', '3.23.1'), ('hypothesis', '6.46.7'), + # For tests + ('pytest-rerunfailures', '11.1'), + ('pytest-shard', '0.1.2'), ] dependencies = [ diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-pytest-args.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-pytest-args.patch new file mode 100644 index 00000000000..f89df575837 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_fix-pytest-args.patch @@ -0,0 +1,28 @@ +As we don't set `--save-xml` pytest is called without arguments causing it to try to discover ALL tests. +This leads to massive failures in e.g. `test_ops*` where `--use-pytest` is used by the tests. +See https://github.com/pytorch/pytorch/pull/94589 + +Author: Alexander Grund (TU Dresden) + +diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py +index e32850908d4..e63c6f2a392 100644 +--- a/torch/testing/_internal/common_utils.py ++++ b/torch/testing/_internal/common_utils.py +@@ -737,14 +737,16 @@ def run_tests(argv=UNITTEST_ARGS): + failed |= wait_for_process(p) != 0 + assert not failed, "Some test shards have failed" + elif USE_PYTEST: ++ pytest_args = argv + if TEST_SAVE_XML: + test_report_path = get_report_path(pytest=True) + print(f'Test results will be stored in {test_report_path}') ++ pytest_args = pytest_args + [f'--junit-xml-reruns={test_report_path}'] + + import pytest + os.environ["NO_COLOR"] = "1" + os.environ["USING_PYTEST"] = "1" +- exit_code = pytest.main(args=argv + [f'--junit-xml-reruns={test_report_path}'] if TEST_SAVE_XML else []) ++ exit_code = pytest.main(args=pytest_args) + del os.environ["USING_PYTEST"] + if TEST_SAVE_XML: + sanitize_pytest_xml(test_report_path) From 6124d4caf74f2d2a9f1a936586528d900e5d7324 Mon Sep 17 00:00:00 2001 From: Simon Branford Date: Sat, 11 Feb 2023 09:38:05 +0000 Subject: [PATCH 4/7] more patches --- .../p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 9 +++++ ...h-1.13.1_increase-tolerance-test_ops.patch | 21 +++++++++++ ...Torch-1.13.1_install-vsx-vec-headers.patch | 35 +++++++++++++++++++ ...yTorch-1.13.1_skip-failing-grad-test.patch | 25 +++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_install-vsx-vec-headers.patch create mode 100644 easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-failing-grad-test.patch diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index c3837e6e707..8a737d028c0 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -27,6 +27,9 @@ patches = [ 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', 'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch', 'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch', + 'PyTorch-1.13.1_increase-tolerance-test_ops.patch', + 'PyTorch-1.13.1_install-vsx-vec-headers.patch', + 'PyTorch-1.13.1_skip-failing-grad-test.patch', ] checksums = [ {'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'}, @@ -55,6 +58,12 @@ checksums = [ 'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'}, {'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch': '92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'}, + {'PyTorch-1.13.1_increase-tolerance-test_ops.patch': + 'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'}, + {'PyTorch-1.13.1_install-vsx-vec-headers.patch': + '7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'}, + {'PyTorch-1.13.1_skip-failing-grad-test.patch': + '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'} ] osdependencies = [OS_PKG_IBVERBS_DEV] diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch new file mode 100644 index 00000000000..fb28aadbefa --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_increase-tolerance-test_ops.patch @@ -0,0 +1,21 @@ +`test_out` may fail due to slightly different values caused by different order of matrizes in SGEMM: + +> Mismatched elements: 1 / 50 (2.0%) +> Greatest absolute difference: 1.430511474609375e-05 at index (4, 5) (up to 1e-05 allowed) +> Greatest relative difference: 4.65393206065873e-06 at index (4, 5) (up to 1.3e-06 allowed) + +Author: Alexander Grund (TU Dresden) +Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham) + +--- a/test/test_ops.py ++++ b/test/test_ops.py +@@ -545,6 +545,9 @@ + else list(supported_dtypes)[0] + ) + ++ if dtype is torch.float32: ++ self.precision, self.rel_tol = (1.5e-05, 1e-05) ++ + samples = op.sample_inputs(device, dtype) + for sample in samples: + # calls it normally to get the expected result diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_install-vsx-vec-headers.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_install-vsx-vec-headers.patch new file mode 100644 index 00000000000..1f6150a5cb0 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_install-vsx-vec-headers.patch @@ -0,0 +1,35 @@ +Add missing headers to the installation which fixes e.g. test_cpp_extensions_aot_ninja +See https://github.com/pytorch/pytorch/pull/85547 + +Author: Alexander Grund (TU Dresden) +Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham) + +--- a/aten/src/ATen/CMakeLists.txt ++++ b/aten/src/ATen/CMakeLists.txt +@@ -56,7 +56,7 @@ + EXCLUDE(ATen_CORE_TEST_SRCS "${ATen_CORE_TEST_SRCS}" ${ATen_CORE_EXCLUDED_TEST_SRCS}) + endif() + +-file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h") ++file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h") + file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp") + file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh") + file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp") +--- a/setup.py ++++ b/setup.py +@@ -1031,6 +1031,7 @@ + 'include/ATen/*.h', + 'include/ATen/cpu/*.h', + 'include/ATen/cpu/vec/vec256/*.h', ++ 'include/ATen/cpu/vec/vec256/vsx/*.h', + 'include/ATen/cpu/vec/vec512/*.h', + 'include/ATen/cpu/vec/*.h', + 'include/ATen/core/*.h', +@@ -1138,6 +1139,7 @@ + 'include/THH/*.cuh', + 'include/THH/*.h*', + 'include/THH/generic/*.h', ++ 'include/sleef.h', + 'share/cmake/ATen/*.cmake', + 'share/cmake/Caffe2/*.cmake', + 'share/cmake/Caffe2/public/*.cmake', diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-failing-grad-test.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-failing-grad-test.patch new file mode 100644 index 00000000000..13ab1717e00 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-failing-grad-test.patch @@ -0,0 +1,25 @@ +Skip test_forward_mode_AD_nn_functional_max_unpool2d_cpu_float64 & test_forward_mode_AD_nn_functional_max_unpool3d_cpu_float64 +which may unexpectably succeed. + +Author: Simon Branford (University of Birmingham) + +--- a/torch/testing/_internal/common_methods_invocations.py ++++ b/torch/testing/_internal/common_methods_invocations.py +@@ -11574,7 +11574,7 @@ + # and if there are several indices pointing to the same memory, + # gradcheck is oblivious about that and cannot perturb them all at once + # (see sample_inputs_max_unpool_grad to find out more). +- DecorateInfo(unittest.expectedFailure, 'TestGradients', 'test_forward_mode_AD'), ++ DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_forward_mode_AD'), + DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_gradgrad'), + DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_grad'), + DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_forward_ad'), +@@ -11611,7 +11611,7 @@ + # and if there are several indices pointing to the same memory, + # gradcheck is oblivious about that and cannot perturb them all at once + # (see sample_inputs_max_unpool_grad to find out more). +- DecorateInfo(unittest.expectedFailure, 'TestGradients', 'test_forward_mode_AD'), ++ DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_forward_mode_AD'), + DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_gradgrad'), + DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_grad'), + DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad'), From 8cd3bc9ac7304dc26e85afeaf1f2c35026bafa3c Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 13 Apr 2023 18:06:58 +0200 Subject: [PATCH 5/7] skip test_native_mha for PyTorch 1.13.1, which fails on Broadwell systems --- easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index 8a737d028c0..5f15c785eb3 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -100,6 +100,9 @@ excluded_tests = { 'distributions/test_constraints', # no xdoctest 'doctests', + # failing on broadwell + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'test_native_mha', ] } From 52cbf0e64ab4be9e4aa7baf54080e2656f0c2697 Mon Sep 17 00:00:00 2001 From: Simon Branford <4967+branfosj@users.noreply.github.com> Date: Fri, 14 Apr 2023 08:54:06 +0100 Subject: [PATCH 6/7] skip distributed/rpc/test_tensorpipe_agent --- easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index 5f15c785eb3..af5f58752a4 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -103,6 +103,9 @@ excluded_tests = { # failing on broadwell # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 'test_native_mha', + # intermittent failures on various systems + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'distributed/rpc/test_tensorpipe_agent', ] } From 233902d1a348f633adf1e0ccee709b7c2bd31de6 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 5 Jul 2023 19:17:20 +0200 Subject: [PATCH 7/7] add extra patch files for PyTorch 1.13.1 to fix failing tests --- .../p/PyTorch/PyTorch-1.13.1-foss-2022a.eb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb index af5f58752a4..b6a1e3ca5b2 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb @@ -14,6 +14,7 @@ patches = [ 'PyTorch-1.10.0_fix-kineto-crash.patch', 'PyTorch-1.11.0_fix-fsdp-fp16-test.patch', 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', 'PyTorch-1.12.1_fix-skip-decorators.patch', 'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', 'PyTorch-1.12.1_fix-test_wishart_log_prob.patch', @@ -21,7 +22,7 @@ patches = [ 'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch', 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', 'PyTorch-1.12.1_fix-vsx-loadu.patch', - 'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch', + 'PyTorch-1.12.1_skip-test_round_robin.patch', 'PyTorch-1.13.1_fix-pytest-args.patch', 'PyTorch-1.13.1_fix-test-ops-conf.patch', 'PyTorch-1.13.1_no-cuda-stubs-rpath.patch', @@ -38,6 +39,8 @@ checksums = [ {'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'}, {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, {'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'}, {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, @@ -48,10 +51,8 @@ checksums = [ '0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'}, {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'}, {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, - {'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch': - '1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d'}, - {'PyTorch-1.13.1_fix-pytest-args.patch': - 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'}, + {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'}, + {'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'}, {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'}, {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'}, {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch': @@ -62,8 +63,7 @@ checksums = [ 'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'}, {'PyTorch-1.13.1_install-vsx-vec-headers.patch': '7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'}, - {'PyTorch-1.13.1_skip-failing-grad-test.patch': - '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'} + {'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'}, ] osdependencies = [OS_PKG_IBVERBS_DEV]