Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

retry ssh run in e2e tests #2788

Merged
merged 3 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests_e2e/orchestrator/runbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ combinator:
location: $(location)
vm_size: $(vm_size)

concurrency: 32
concurrency: 16

notifier:
- type: agent.junit
Expand Down
18 changes: 18 additions & 0 deletions tests_e2e/tests/lib/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Callable, Any

from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.shell import CommandError


def execute_with_retry(operation: Callable[[], Any]) -> Any:
Expand All @@ -39,3 +40,20 @@ def execute_with_retry(operation: Callable[[], Any]) -> Any:
time.sleep(30)


def retry_ssh_run(operation: Callable[[], Any]) -> Any:
"""
This method attempts to retry ssh run command a few times if operation failed with connection time out
"""
attempts = 3
while attempts > 0:
attempts -= 1
try:
return operation()
except Exception as e:
# We raise CommandError on !=0 exit codes in the called method
if isinstance(e, CommandError):
# Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError
if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member
raise
log.warning("The operation failed with %s, retrying in 30 secs.", e)
time.sleep(30)
10 changes: 6 additions & 4 deletions tests_e2e/tests/lib/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
from pathlib import Path

from tests_e2e.tests.lib import shell
from tests_e2e.tests.lib.retry import retry_ssh_run


class SshClient(object):
def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22):
self._ip_address: str = ip_address
self._username:str = username
self._username: str = username
self._private_key_file: Path = private_key_file
self._port: int = port

Expand All @@ -43,16 +44,17 @@ def run_command(self, command: str, use_sudo: bool = False) -> str:
# Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there.
# Note, too, that when using sudo we need to carry over the value of PATH to the sudo session
sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else ''
return shell.run_command([
return retry_ssh_run(lambda: shell.run_command([
"ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, destination,
f"source ~/bin/agent-env;{sudo} {command}"])
f"source ~/bin/agent-env;{sudo} {command}"]))

@staticmethod
def generate_ssh_key(private_key_file: Path):
"""
Generates an SSH key on the given Path
"""
shell.run_command(["ssh-keygen", "-m", "PEM", "-t", "rsa", "-b", "4096", "-q", "-N", "", "-f", str(private_key_file)])
shell.run_command(
["ssh-keygen", "-m", "PEM", "-t", "rsa", "-b", "4096", "-q", "-N", "", "-f", str(private_key_file)])

def get_architecture(self):
return self.run_command("uname -m").rstrip()
Expand Down