-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Core] Fix status refresh for INIT cluster #2491
Changes from 3 commits
e17c595
d7463c6
892ffc9
8495134
15cdcc9
6f75c06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2014,14 +2014,24 @@ def run_ray_status_to_check_ray_cluster_healthy() -> bool: | |
# triggered. | ||
if external_ips is None or len(external_ips) == 0: | ||
logger.debug(f'Refreshing status ({cluster_name!r}): No cached ' | ||
f'IPs found. External IPs: {external_ips}') | ||
f'IPs found. Handle: {handle}') | ||
raise exceptions.FetchIPError( | ||
reason=exceptions.FetchIPError.Reason.HEAD) | ||
|
||
if handle.head_ssh_port is None: | ||
# Refresh the ssh ports. It is ok to refresh as it is fast. | ||
handle.external_ssh_ports() | ||
if handle.head_ssh_port is None: | ||
logger.debug( | ||
f'Refreshing status ({cluster_name!r}): failed ' | ||
f'to get the ssh ports. Handle: {handle}') | ||
raise exceptions.FetchIPError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we use a new exception named There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good! |
||
reason=exceptions.FetchIPError.Reason.HEAD) | ||
|
||
# Check if ray cluster status is healthy. | ||
ssh_credentials = ssh_credential_from_yaml(handle.cluster_yaml, | ||
handle.docker_user) | ||
assert handle.head_ssh_port is not None, handle | ||
|
||
runner = command_runner.SSHCommandRunner(external_ips[0], | ||
**ssh_credentials, | ||
port=handle.head_ssh_port) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we use
update_ssh_ports
for better readability?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! Actually, we don't need to handle this
None
or refresh case, as we already have it in theexternal_ssh_ports
call. changed to that instead.