-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Core] Install SkyPilot runtime in separate env #2801
Changes from all commits
9bcabb4
57fd5be
7824745
c8a7052
7ddcf75
22d9860
f3e6df0
6c6ea73
ed9866e
6491629
01eea55
6753b2d
db60092
5fb31e0
952bc80
b39fa19
27dc07b
97ad588
dbcfff5
cc267c0
c353d3b
465cd77
a280503
bedd6e4
141ac76
cf7c315
e98788f
06c325a
fe9b5da
71d9594
841f1ff
ba38eea
93afbc6
ae84211
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,12 +13,13 @@ def restart_skylet(): | |
# TODO(zhwu): make the killing graceful, e.g., use a signal to tell | ||
# skylet to exit, instead of directly killing it. | ||
subprocess.run( | ||
'ps aux | grep "sky.skylet.skylet" | grep "python3 -m"' | ||
'ps aux | grep "sky.skylet.skylet" | grep "python -m"' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, is there any reason to make this change? |
||
'| awk \'{print $2}\' | xargs kill >> ~/.sky/skylet.log 2>&1', | ||
shell=True, | ||
check=False) | ||
subprocess.run( | ||
'nohup python3 -m sky.skylet.skylet' | ||
f'{constants.ACTIVATE_PYTHON_ENV} ' | ||
f'nohup python -m sky.skylet.skylet' | ||
' >> ~/.sky/skylet.log 2>&1 &', | ||
shell=True, | ||
check=True) | ||
|
@@ -27,7 +28,7 @@ def restart_skylet(): | |
|
||
|
||
proc = subprocess.run( | ||
'ps aux | grep -v "grep" | grep "sky.skylet.skylet" | grep "python3 -m"', | ||
'ps aux | grep -v "grep" | grep "sky.skylet.skylet" | grep "python -m"', | ||
shell=True, | ||
check=False) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
|
||
from sky import sky_logging | ||
from sky.skylet import configs | ||
from sky.skylet import constants | ||
from sky.utils import common_utils | ||
|
||
logger = sky_logging.init_logger(__name__) | ||
|
@@ -121,4 +122,5 @@ def is_autostopping(cls) -> str: | |
def _build(cls, code: List[str]) -> str: | ||
code = cls._PREFIX + code | ||
code = ';'.join(code) | ||
return f'python3 -u -c {shlex.quote(code)}' | ||
return (f'{constants.ACTIVATE_PYTHON_ENV} ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, why not use |
||
f'python -u -c {shlex.quote(code)}') |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,14 @@ | |
SKY_REMOTE_RAY_PORT_FILE = '~/.sky/ray_port.json' | ||
SKY_REMOTE_RAY_TEMPDIR = '/tmp/ray_skypilot' | ||
SKY_REMOTE_RAY_VERSION = '2.9.3' | ||
SKY_REMOTE_WHEEL_PATH = '~/.sky/wheels' | ||
SKY_REMOTE_PYTHON_ENV_NAME = 'skypilot-runtime' | ||
SKY_REMOTE_PYTHON_ENV = f'~/{SKY_REMOTE_PYTHON_ENV_NAME}' | ||
|
||
# TODO(mluo): Make explicit `sky launch -c <name> ''` optional. | ||
UNINITIALIZED_ONPREM_CLUSTER_MESSAGE = ( | ||
'Found uninitialized local cluster {cluster}. Run this ' | ||
'command to initialize it locally: sky launch -c {cluster} \'\'') | ||
Comment on lines
+29
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC we already deprecated the local clusters? Should we remove it? |
||
|
||
# The name for the environment variable that stores the unique ID of the | ||
# current task. This will stay the same across multiple recoveries of the | ||
|
@@ -66,19 +74,38 @@ | |
DOCKER_SERVER_ENV_VAR, | ||
} | ||
|
||
ACTIVATE_PYTHON_ENV = (f'[ -d {SKY_REMOTE_PYTHON_ENV} ] && ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In what case will the remote python env not exist? Should we print some warning here? |
||
f'source {SKY_REMOTE_PYTHON_ENV}/bin/activate;') | ||
|
||
|
||
def run_in_python_env(command): | ||
"""Returns a command that runs in the SkyPilot's python environment.""" | ||
return f'( {ACTIVATE_PYTHON_ENV}{command}; deactivate )' | ||
|
||
|
||
# Install conda on the remote cluster if it is not already installed. | ||
# We use conda with python 3.10 to be consistent across multiple clouds with | ||
# best effort. | ||
# https://github.com/ray-project/ray/issues/31606 | ||
# We use python 3.10 to be consistent with the python version of the | ||
# AWS's Deep Learning AMI's default conda environment. | ||
_RUN_PYTHON = run_in_python_env('python \\$@') | ||
_RUN_PIP = run_in_python_env('pip \\$@') | ||
_RUN_RAY = run_in_python_env('ray \\$@') | ||
CONDA_INSTALLATION_COMMANDS = ( | ||
'which conda > /dev/null 2>&1 || ' | ||
'(wget -nc https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -O Miniconda3-Linux-x86_64.sh && ' # pylint: disable=line-too-long | ||
'{ wget -nc https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -O Miniconda3-Linux-x86_64.sh && ' # pylint: disable=line-too-long | ||
'bash Miniconda3-Linux-x86_64.sh -b && ' | ||
'eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && ' | ||
'conda config --set auto_activate_base true); ' | ||
'grep "# >>> conda initialize >>>" ~/.bashrc || conda init;') | ||
'conda config --set auto_activate_base true && source ~/.bashrc; }; ' | ||
'grep "# >>> conda initialize >>>" ~/.bashrc || conda init;' | ||
# Create a separate conda environment for SkyPilot dependencies. | ||
f'[ -d {SKY_REMOTE_PYTHON_ENV} ] || ' | ||
f'python -m venv {SKY_REMOTE_PYTHON_ENV}; ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we considered using a conda env instead? What are the pros & cons of the two implementations? |
||
f'source {SKY_REMOTE_PYTHON_ENV}/bin/activate; ' | ||
f'echo "function skypy () {{ {_RUN_PYTHON} }}" >> ~/.bashrc;' | ||
f'echo "function skypip () {{ {_RUN_PIP} }}" >> ~/.bashrc;' | ||
f'echo "function skyray () {{ {_RUN_RAY} }}" >> ~/.bashrc;') | ||
|
||
_sky_version = str(version.parse(sky.__version__)) | ||
RAY_STATUS = f'RAY_ADDRESS=127.0.0.1:{SKY_REMOTE_RAY_PORT} ray status' | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add this for newly-added cloud as well, e.g. runpod |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we put this variable into
controller_utils.shared_controller_vars_to_fill
since it is both used by serve & spot controller?