Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add/fix patches for PyTorch 1.13.1 w/ foss/2022a #18371

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a.eb
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,30 @@ sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.10.0_fix-kineto-crash.patch',
'PyTorch-1.11.0_fix-fsdp-fp16-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-skip-decorators.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-test_wishart_log_prob.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch',
'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
'PyTorch-1.12.1_fix-vsx-loadu.patch',
'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-fsdp-fp16-test.patch',
'PyTorch-1.13.1_fix-pytest-args.patch',
'PyTorch-1.13.1_fix-test-ops-conf.patch',
'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch',
'PyTorch-1.13.1_increase-tolerance-test_ops.patch',
'PyTorch-1.13.1_install-vsx-vec-headers.patch',
'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.13.1_skip-failing-grad-test.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
]
checksums = [
{'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'},
{'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
Expand All @@ -49,21 +48,22 @@ checksums = [
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch':
'0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'},
{'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
{'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'},
{'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-fsdp-fp16-test.patch': '8ae68e60d6e1f92f50322b7f0381c7e65251fba32d7606e3a238a36a2f55b5cf'},
{'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'},
{'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'},
{'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
{'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
{'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch':
'92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'},
{'PyTorch-1.13.1_increase-tolerance-test_ops.patch':
'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'},
'c909fdfc2b12df457e1eb5514265ffec3eab653994949416f3f048668421e223'},
{'PyTorch-1.13.1_install-vsx-vec-headers.patch':
'7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'},
{'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
{'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
{'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The test fails on a node with more than 5 V100 GPUs or more than 4 A100 GPUs.
Hence limit the world_size to 4
See https://github.com/pytorch/pytorch/pull/86280

Author: Alexander Grund (TU Dresden)

diff --git a/test/distributed/fsdp/test_fsdp_pure_fp16.py b/test/distributed/fsdp/test_fsdp_pure_fp16.py
index 1c663f8263354..e0033ef3d4b72 100644
--- a/test/distributed/fsdp/test_fsdp_pure_fp16.py
+++ b/test/distributed/fsdp/test_fsdp_pure_fp16.py
@@ -34,8 +34,8 @@
class TestPureFP16(FSDPTest):
@property
def world_size(self):
- # Test fails due to inaccuracies when using more than 5 GPUs
- return min(5, super().world_size)
+ # Test fails due to inaccuracies when using more than 4 GPUs
+ return min(4, super().world_size)

@skip_if_lt_x_gpu(2)
@parametrize(
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,24 @@
> Greatest absolute difference: 1.430511474609375e-05 at index (4, 5) (up to 1e-05 allowed)
> Greatest relative difference: 4.65393206065873e-06 at index (4, 5) (up to 1.3e-06 allowed)

See https://github.com/pytorch/pytorch/pull/86365

Author: Alexander Grund (TU Dresden)
Updated for PyTorch 1.13.1: Simon Branford (University of Birmingham)

--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -545,6 +545,9 @@
else list(supported_dtypes)[0]
)

+ if dtype is torch.float32:
+ self.precision, self.rel_tol = (1.5e-05, 1e-05)
+
samples = op.sample_inputs(device, dtype)
for sample in samples:
# calls it normally to get the expected result
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index 4b2d0ebabc46b..bab7843a72b74 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -8503,7 +8503,11 @@ op_db: List[OpInfo] = [
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1.3e-05, rtol=1.3e-05),
torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
- 'TestCommon', 'test_numpy_refs')],
+ 'TestCommon', 'test_numpy_refs'),
+ DecorateInfo(
+ toleranceOverride({torch.float32: tol(atol=1.5e-05, rtol=1e-05)}),
+ 'TestCommon', 'test_out'),
+ ],
skips=(
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
Those tests (from test_ao_sparsity & test_quantization) require FBGEMM which may not be available.
So add the skip decorator.
See https://github.com/pytorch/pytorch/issues/87364

Author: Alexander Grund (TU Dresden)

diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py
index 6a1b6067a4c..0c43f585af2 100644
--- a/test/ao/sparsity/test_composability.py
+++ b/test/ao/sparsity/test_composability.py
@@ -9,6 +9,7 @@ import torch.ao.quantization as tq
from torch import nn
from torch.ao import sparsity
from torch.testing._internal.common_utils import TestCase
+from torch.testing._internal.common_quantization import skipIfNoFBGEMM
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx
from torch.ao.sparsity import fqn_to_module

@@ -62,6 +63,7 @@ def _calculate_sparsity(tensor):
# This series of tests are to check the composability goals for sparsity and quantization. Namely
# that performing quantization and sparsity model manipulations in various orderings
# does not cause problems
+@skipIfNoFBGEMM
class TestComposability(TestCase):
# This test checks whether performing quantization prepare before sparse prepare
# causes any issues and verifies that the correct observers are inserted and that
@@ -326,6 +328,7 @@ class TestFxComposability(TestCase):
r"""This series of tests checks that various steps of the quantization and sparsity flow
compose cleanly despite variation in sequencing.
"""
+ @skipIfNoFBGEMM
def test_q_prep_fx_before_s_prep(self):
r"""
This test checks that the ordering of prepare_fx -> sparse prepare -> convert_fx
@@ -445,6 +448,7 @@ class TestFxComposability(TestCase):
)
self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])

+ @skipIfNoFBGEMM
def test_s_prep_before_q_prep_fx(self):
r"""
This test checks that the ordering of sparse prepare -> prepare_fx -> convert_fx
@@ -490,6 +494,7 @@ class TestFxComposability(TestCase):
)
self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])

+ @skipIfNoFBGEMM
def test_s_prep_before_qat_prep_fx(self):
r"""
This test checks that the ordering of sparse prepare -> prepare_qat_fx -> convert_fx
diff --git a/test/quantization/core/test_docs.py b/test/quantization/core/test_docs.py
index 27842b46ce7..8e50ffa3166 100644
--- a/test/quantization/core/test_docs.py
+++ b/test/quantization/core/test_docs.py
@@ -10,11 +10,13 @@ import torch
from torch.testing._internal.common_quantization import (
QuantizationTestCase,
SingleLayerLinearModel,
+ skipIfNoFBGEMM,
)
from torch.testing._internal.common_quantized import override_quantized_engine
from torch.testing._internal.common_utils import IS_ARM64


+@skipIfNoFBGEMM
class TestQuantizationDocs(QuantizationTestCase):
r"""
The tests in this section import code from the quantization docs and check that