Skip to content

Commit

Permalink
Re-raise P2PConsistencyError from failed P2P tasks. (#8748)
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikmakait authored Jul 15, 2024
1 parent 48eefee commit 110eac1
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
4 changes: 4 additions & 0 deletions distributed/shuffle/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ def handle_transfer_errors(id: ShuffleId) -> Iterator[None]:
yield
except ShuffleClosedError:
raise Reschedule()
except P2PConsistencyError:
raise
except Exception as e:
raise RuntimeError(f"P2P shuffling {id} failed during transfer phase") from e

Expand All @@ -518,6 +520,8 @@ def handle_unpack_errors(id: ShuffleId) -> Iterator[None]:
raise e
except ShuffleClosedError:
raise Reschedule()
except P2PConsistencyError:
raise
except Exception as e:
raise RuntimeError(f"P2P shuffling {id} failed during unpack phase") from e

Expand Down
4 changes: 3 additions & 1 deletion distributed/shuffle/_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
handle_transfer_errors,
handle_unpack_errors,
)
from distributed.shuffle._exceptions import DataUnavailable
from distributed.shuffle._exceptions import DataUnavailable, P2PConsistencyError
from distributed.shuffle._limiter import ResourceLimiter
from distributed.shuffle._worker_plugin import ShuffleWorkerPlugin
from distributed.sizeof import sizeof
Expand Down Expand Up @@ -105,6 +105,8 @@ def shuffle_barrier(id: ShuffleId, run_ids: list[int]) -> int:
return get_worker_plugin().barrier(id, run_ids)
except Reschedule as e:
raise e
except P2PConsistencyError:
raise
except Exception as e:
raise RuntimeError(f"shuffle_barrier failed during shuffle {id}") from e

Expand Down
4 changes: 1 addition & 3 deletions distributed/shuffle/tests/test_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,7 @@ async def test_shuffle_with_array_conversion(c, s, a, b, npartitions):

if npartitions == 1:
# FIXME: distributed#7816
with raises_with_cause(
RuntimeError, "failed during transfer", RuntimeError, "Barrier task"
):
with pytest.raises(P2PConsistencyError, match="Barrier task"):
await c.compute(out)
else:
await c.compute(out)
Expand Down

0 comments on commit 110eac1

Please sign in to comment.