Skip to content

Commit

Permalink
Merge pull request #17155 from branfosj/20230119093142_new_pr_PyTorch…
Browse files Browse the repository at this point in the history
…1131

{ai}[foss/2022a] PyTorch v1.13.1 w/ Python 3.10.4
  • Loading branch information
boegel authored Jul 6, 2023
2 parents 85ec6fd + 233902d commit cdc3499
Show file tree
Hide file tree
Showing 9 changed files with 484 additions and 0 deletions.
116 changes: 116 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
name = 'PyTorch'
version = '1.13.1'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022a'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.10.0_fix-kineto-crash.patch',
'PyTorch-1.11.0_fix-fsdp-fp16-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-skip-decorators.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-test_wishart_log_prob.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch',
'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
'PyTorch-1.12.1_fix-vsx-loadu.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-pytest-args.patch',
'PyTorch-1.13.1_fix-test-ops-conf.patch',
'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch',
'PyTorch-1.13.1_increase-tolerance-test_ops.patch',
'PyTorch-1.13.1_install-vsx-vec-headers.patch',
'PyTorch-1.13.1_skip-failing-grad-test.patch',
]
checksums = [
{'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'},
{'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-test_wishart_log_prob.patch':
'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch':
'0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'},
{'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
{'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'},
{'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'},
{'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
{'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
{'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch':
'92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'},
{'PyTorch-1.13.1_increase-tolerance-test_ops.patch':
'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'},
{'PyTorch-1.13.1_install-vsx-vec-headers.patch':
'7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'},
{'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.23.1'),
('hypothesis', '6.46.7'),
# For tests
('pytest-rerunfailures', '11.1'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
('Python', '3.10.4'),
('protobuf', '3.19.4'),
('protobuf-python', '3.19.4'),
('pybind11', '2.9.2'),
('SciPy-bundle', '2022.05'),
('PyYAML', '6.0'),
('MPFR', '4.1.0'),
('GMP', '6.2.1'),
('numactl', '2.0.14'),
('FFmpeg', '4.4.2'),
('Pillow', '9.1.1'),
('expecttest', '0.1.3'),
]

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
As we don't set `--save-xml` pytest is called without arguments causing it to try to discover ALL tests.
This leads to massive failures in e.g. `test_ops*` where `--use-pytest` is used by the tests.
See https://github.com/pytorch/pytorch/pull/94589

Author: Alexander Grund (TU Dresden)

diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index e32850908d4..e63c6f2a392 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py
@@ -737,14 +737,16 @@ def run_tests(argv=UNITTEST_ARGS):
failed |= wait_for_process(p) != 0
assert not failed, "Some test shards have failed"
elif USE_PYTEST:
+ pytest_args = argv
if TEST_SAVE_XML:
test_report_path = get_report_path(pytest=True)
print(f'Test results will be stored in {test_report_path}')
+ pytest_args = pytest_args + [f'--junit-xml-reruns={test_report_path}']

import pytest
os.environ["NO_COLOR"] = "1"
os.environ["USING_PYTEST"] = "1"
- exit_code = pytest.main(args=argv + [f'--junit-xml-reruns={test_report_path}'] if TEST_SAVE_XML else [])
+ exit_code = pytest.main(args=pytest_args)
del os.environ["USING_PYTEST"]
if TEST_SAVE_XML:
sanitize_pytest_xml(test_report_path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
From 8581301957b0018a32433f85163535709bc9d332 Mon Sep 17 00:00:00 2001
From: Masaki Kozuki <mkozuki@nvidia.com>
Date: Fri, 7 Oct 2022 21:25:07 -0700
Subject: [PATCH] try using a different group name

ref:
https://github.com/pytorch/pytorch/issues/85923#issuecomment-1272220271

Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com>
---
functorch/test/conftest.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/functorch/test/conftest.py b/functorch/test/conftest.py
index d2e929a9a58db..afc39d9f35de9 100644
--- a/functorch/test/conftest.py
+++ b/functorch/test/conftest.py
@@ -17,7 +17,7 @@


def pytest_addoption(parser: Parser) -> None:
- group = parser.getgroup("terminal reporting")
+ group = parser.getgroup("terminal reporting functorch")
group.addoption(
"--junit-xml-reruns",
action="store",
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
`test_out` may fail due to slightly different values caused by different order of matrizes in SGEMM:

> Mismatched elements: 1 / 50 (2.0%)
> Greatest absolute difference: 1.430511474609375e-05 at index (4, 5) (up to 1e-05 allowed)
> Greatest relative difference: 4.65393206065873e-06 at index (4, 5) (up to 1.3e-06 allowed)

Author: Alexander Grund (TU Dresden)
Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham)

--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -545,6 +545,9 @@
else list(supported_dtypes)[0]
)

+ if dtype is torch.float32:
+ self.precision, self.rel_tol = (1.5e-05, 1e-05)
+
samples = op.sample_inputs(device, dtype)
for sample in samples:
# calls it normally to get the expected result
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Add missing headers to the installation which fixes e.g. test_cpp_extensions_aot_ninja
See https://github.com/pytorch/pytorch/pull/85547

Author: Alexander Grund (TU Dresden)
Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham)

--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -56,7 +56,7 @@
EXCLUDE(ATen_CORE_TEST_SRCS "${ATen_CORE_TEST_SRCS}" ${ATen_CORE_EXCLUDED_TEST_SRCS})
endif()

-file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h")
+file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h")
file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp")
file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh")
file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
--- a/setup.py
+++ b/setup.py
@@ -1031,6 +1031,7 @@
'include/ATen/*.h',
'include/ATen/cpu/*.h',
'include/ATen/cpu/vec/vec256/*.h',
+ 'include/ATen/cpu/vec/vec256/vsx/*.h',
'include/ATen/cpu/vec/vec512/*.h',
'include/ATen/cpu/vec/*.h',
'include/ATen/core/*.h',
@@ -1138,6 +1139,7 @@
'include/THH/*.cuh',
'include/THH/*.h*',
'include/THH/generic/*.h',
+ 'include/sleef.h',
'share/cmake/ATen/*.cmake',
'share/cmake/Caffe2/*.cmake',
'share/cmake/Caffe2/public/*.cmake',
Loading

0 comments on commit cdc3499

Please sign in to comment.