Skip to content

Commit

Permalink
ssh retry
Browse files Browse the repository at this point in the history
  • Loading branch information
nagworld9 committed Mar 20, 2023
1 parent 8ebaf41 commit 660454d
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
2 changes: 1 addition & 1 deletion tests_e2e/orchestrator/runbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ combinator:
location: $(location)
vm_size: $(vm_size)

concurrency: 32
concurrency: 16

notifier:
- type: agent.junit
Expand Down
18 changes: 18 additions & 0 deletions tests_e2e/tests/lib/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Callable, Any

from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.shell import CommandError


def execute_with_retry(operation: Callable[[], Any]) -> Any:
Expand All @@ -39,3 +40,20 @@ def execute_with_retry(operation: Callable[[], Any]) -> Any:
time.sleep(30)


def retry_ssh_run(operation: Callable[[], Any]) -> Any:
"""
This method attempts to retry ssh run command a few times if operation failed with connection time out
"""
attempts = 3
while attempts > 0:
attempts -= 1
try:
return operation()
except Exception as e:
# We raise CommandError on !=0 exit codes in the downstream
if isinstance(e, CommandError):
# Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError
if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member
raise
log.warning("The operation failed with SSH Connection time out, retrying in 30 secs. Error: %s", e)
time.sleep(30)
13 changes: 8 additions & 5 deletions tests_e2e/tests/lib/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
from pathlib import Path

from tests_e2e.tests.lib import shell
from tests_e2e.tests.lib.retry import retry_ssh_run


class SshClient(object):
def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22):
self._ip_address: str = ip_address
self._username:str = username
self._username: str = username
self._private_key_file: Path = private_key_file
self._port: int = port

Expand All @@ -38,21 +39,23 @@ def run_command(self, command: str, use_sudo: bool = False) -> str:
# Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there.
# Note, too, that when using sudo we need to carry over the value of PATH to the sudo session
sudo = "sudo env PATH=$PATH" if use_sudo else ''
return shell.run_command([
return retry_ssh_run(lambda: shell.run_command([
"ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, destination,
f"PATH=~/bin:$PATH;{sudo} {command}"])
f"PATH=~/bin:$PATH;{sudo} {command}"]))

@staticmethod
def generate_ssh_key(private_key_file: Path):
"""
Generates an SSH key on the given Path
"""
shell.run_command(["ssh-keygen", "-m", "PEM", "-t", "rsa", "-b", "4096", "-q", "-N", "", "-f", str(private_key_file)])
shell.run_command(
["ssh-keygen", "-m", "PEM", "-t", "rsa", "-b", "4096", "-q", "-N", "", "-f", str(private_key_file)])

def get_architecture(self):
return self.run_command("uname -m").rstrip()

def copy(self, source: Path, target: Path, remote_source: bool = False, remote_target: bool = False, recursive: bool = False):
def copy(self, source: Path, target: Path, remote_source: bool = False, remote_target: bool = False,
recursive: bool = False):
"""
Copy file from local to remote machine
"""
Expand Down

0 comments on commit 660454d

Please sign in to comment.