Skip to content

Commit 0d86924

Browse files
authored
Remove tracebacks for exceptions to improve UX (#441)
* Remove tracebacks * Fix job fail color * fix comments * Hide tracebacks * Fix #442 * fix `workdir` becomes `~/sky_workdir/workdir` #442 * add logging error for job_id problem * format * update error message for retry * Update docs * Fix login * Add more checks * format * fix return type * format * refactor returncode handling * Update return handling * Fix filemount testing
1 parent 93e036e commit 0d86924

File tree

5 files changed

+176
-110
lines changed

5 files changed

+176
-110
lines changed

examples/using_file_mounts.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ file_mounts:
4646
#
4747
# After syncing the target will be:
4848
#
49-
# /tmp/data/
49+
# /tmp/workdir/
5050
# a/
5151
# b
5252
# c
53-
/tmp/data: ~/tmp-workdir
53+
/tmp/workdir: ~/tmp-workdir
5454

5555
# Relative paths are under ~/ (after sync, ~/relative_dir/ exists).
5656
relative_dir: ~/tmp-workdir
@@ -82,8 +82,8 @@ run: |
8282
touch /data/logs/test.log
8383
touch /data/checkpoints/last.pt
8484
85-
echo hi >> /tmp/data/new_file
86-
# tree /tmp/data
85+
echo hi >> /tmp/workdir/new_file
86+
# tree /tmp/workdir
8787
8888
ls -lthr ~/.ssh
8989

sky/backends/backend_utils.py

+35-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""Util constants/functions for the backends."""
2+
import colorama
23
import datetime
34
import enum
45
import getpass
56
import os
67
import pathlib
78
import shlex
89
import subprocess
10+
import sys
911
import textwrap
1012
import time
1113
from typing import Dict, List, Optional, Tuple, Union
@@ -598,12 +600,12 @@ def run_command_on_ip_via_ssh(
598600
ssh_private_key: str,
599601
port_forward: Optional[List[int]] = None,
600602
# Advanced options.
603+
require_outputs: bool = False,
601604
log_path: str = '/dev/null',
602605
stream_logs: bool = True,
603-
check: bool = False,
604606
ssh_mode: SshMode = SshMode.NON_INTERACTIVE,
605607
ssh_control_name: Optional[str] = None,
606-
) -> Tuple[subprocess.Popen, str, str]:
608+
) -> Union[int, Tuple[int, str, str]]:
607609
"""Uses 'ssh' to run 'cmd' on a node with ip.
608610
609611
Args:
@@ -616,6 +618,7 @@ def run_command_on_ip_via_ssh(
616618
617619
Advanced options:
618620
621+
require_outputs: Whether to return the stdout/stderr of the command.
619622
log_path: Redirect stdout/stderr to the log_path.
620623
stream_logs: Stream logs to the stdout/stderr.
621624
check: Check the success of the command.
@@ -625,7 +628,9 @@ def run_command_on_ip_via_ssh(
625628
for optimizing the ssh speed.
626629
627630
Returns:
628-
A tuple of (process, stdout, stderr).
631+
returncode
632+
or
633+
A tuple of (returncode, stdout, stderr).
629634
"""
630635
base_ssh_command = _ssh_base_command(ip,
631636
ssh_private_key,
@@ -636,8 +641,8 @@ def run_command_on_ip_via_ssh(
636641
if ssh_mode == SshMode.LOGIN:
637642
assert isinstance(cmd, list), 'cmd must be a list for login mode.'
638643
command = base_ssh_command + cmd
639-
proc = run(command, shell=False, check=check)
640-
return proc, '', ''
644+
proc = run(command, shell=False, check=False)
645+
return proc.returncode, '', ''
641646
if isinstance(cmd, list):
642647
cmd = ' '.join(cmd)
643648
# We need this to correctly run the cmd, and get the output.
@@ -652,7 +657,31 @@ def run_command_on_ip_via_ssh(
652657
shlex.quote(f'true && source ~/.bashrc && export OMP_NUM_THREADS=1 '
653658
f'PYTHONWARNINGS=ignore && ({cmd})'),
654659
]
655-
return log_lib.run_with_log(command, log_path, stream_logs, check=check)
660+
return log_lib.run_with_log(command,
661+
log_path,
662+
stream_logs,
663+
require_outputs=require_outputs)
664+
665+
666+
def handle_returncode(returncode: int,
667+
command: str,
668+
error_msg: str,
669+
stderr: Optional[str] = None) -> None:
670+
"""Handle the returncode of a command.
671+
672+
Args:
673+
returncode: The returncode of the command.
674+
command: The command that was run.
675+
error_msg: The error message to print.
676+
stderr: The stderr of the command.
677+
"""
678+
if returncode != 0:
679+
if stderr is not None:
680+
logger.error(stderr)
681+
logger.error(f'Command failed with code {returncode}: {command}')
682+
logger.error(
683+
f'{colorama.Fore.RED}{error_msg}{colorama.Style.RESET_ALL}')
684+
sys.exit(returncode)
656685

657686

658687
def run(cmd, **kwargs):

0 commit comments

Comments
 (0)