Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check SSH connectivity during end-to-end tests #2970

Merged
merged 2 commits into from
Nov 3, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import datetime
import json
import logging
import time
import traceback
import uuid

Expand Down Expand Up @@ -58,7 +59,7 @@
from tests_e2e.tests.lib.logging import log, set_thread_name, set_current_thread_log
from tests_e2e.tests.lib.agent_log import AgentLogRecord
from tests_e2e.tests.lib.resource_group_client import ResourceGroupClient
from tests_e2e.tests.lib.shell import run_command
from tests_e2e.tests.lib.shell import run_command, CommandError
from tests_e2e.tests.lib.ssh_client import SshClient


Expand Down Expand Up @@ -398,6 +399,8 @@ def _setup_test_nodes(self) -> None:

ssh_client = SshClient(ip_address=node.ip_address, username=self._user, identity_file=Path(self._identity_file))

self._check_ssh_connectivity(ssh_client)

#
# Cleanup the test node (useful for developer runs)
#
Expand Down Expand Up @@ -447,6 +450,26 @@ def _setup_test_nodes(self) -> None:

log.info("Completed test node setup")

@staticmethod
def _check_ssh_connectivity(ssh_client: SshClient) -> None:
# We may be trying to connect to the test node while it is still booting. Execute a simple command to check that SSH is ready,
# and raise an exception if it is not after a few attempts.
max_attempts = 5
for attempt in range(max_attempts):
try:
log.info("Checking SSH connectivity to the test node...")
ssh_client.run_command("echo 'SSH connectivity check'")
log.info("SSH is ready.")
break
except CommandError as error:
# Check for "System is booting up. Unprivileged users are not permitted to log in yet. Please come back later. For technical details, see pam_nologin(8)."
if "Unprivileged users are not permitted to log in yet" not in error.stderr:
raise
if attempt >= max_attempts - 1:
raise Exception(f"SSH connectivity check failed after {max_attempts} attempts, giving up [{error}]")
log.info("SSH is not ready [%s], will retry after a short delay.", error)
time.sleep(15)

def _collect_logs_from_test_nodes(self) -> None:
"""
Collects the test logs from the test nodes and copies them to the local machine
Expand Down
Loading