Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test for FIPS #2842

Merged
merged 8 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests_e2e/orchestrator/runbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ variable:
#
# The test suites to execute
- name: test_suites
value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned"
value: "agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, fips"
- name: cloud
value: "AzureCloud"
is_case_visible: true
Expand Down
2 changes: 1 addition & 1 deletion tests_e2e/pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parameters:
- name: test_suites
displayName: Test Suites
type: string
default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update
default: agent_bvt, no_outbound_connections, extensions_disabled, agent_not_provisioned, agent_update, fips
# NOTES:
# * 'image', 'location' and 'vm_size' override any values in the test suites/images definition
# files. Those parameters are useful for 1-off tests, like testing a VHD or checking if
Expand Down
6 changes: 3 additions & 3 deletions tests_e2e/test_suites/agent_bvt.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: "AgentBvt"
tests:
- "bvts/extension_operations.py"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed directory to make it consistent with other tests: the directory is name is the same as the test suite's

- "bvts/run_command.py"
- "bvts/vm_access.py"
- "agent_bvt/extension_operations.py"
- "agent_bvt/run_command.py"
- "agent_bvt/vm_access.py"
images:
- "endorsed"
- "endorsed-arm64"
2 changes: 1 addition & 1 deletion tests_e2e/test_suites/extensions_disabled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
#
name: "ExtensionsDisabled"
tests:
- "extensions_disabled.py"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved the test to its own directory for consistency

- "extensions_disabled/extensions_disabled.py"
images: "random(endorsed)"
owns_vm: true
10 changes: 10 additions & 0 deletions tests_e2e/test_suites/fips.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# FIPS should not affect extension processing. The test enables FIPS and then executes an extension.
#
# NOTE: Enabling FIPS is very specific to the distro. This test is only executed on RHEL 9.0.
#
name: "FIPS"
tests:
- source: "fips/fips.py"
images: "rhel_90"
owns_vm: true
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def run(self):
log.info("Installing %s", custom_script_2_0)
message = f"Hello {uuid.uuid4()}!"
custom_script_2_0.enable(
settings={
protected_settings={
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using protected settings also excercises handling of the tenant certificate

'commandToExecute': f"echo \'{message}\'"
},
auto_upgrade_minor_version=False
Expand All @@ -77,7 +77,7 @@ def run(self):

message = f"Hello {uuid.uuid4()}!"
custom_script_2_1.enable(
settings={
protected_settings={
'commandToExecute': f"echo \'{message}\'"
}
)
Expand Down
Empty file removed tests_e2e/tests/bvts/__init__.py
Empty file.
88 changes: 88 additions & 0 deletions tests_e2e/tests/fips/fips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python3

# Microsoft Azure Linux Agent
#
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import uuid
from assertpy import fail
from typing import Any, Dict, List

from tests_e2e.tests.lib.agent_test import AgentTest
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.shell import CommandError
from tests_e2e.tests.lib.ssh_client import SshClient
from tests_e2e.tests.lib.virtual_machine_client import VirtualMachineClient
from tests_e2e.tests.lib.virtual_machine_extension_client import VirtualMachineExtensionClient
from tests_e2e.tests.lib.identifiers import VmExtensionIds


class Fips(AgentTest):
"""
Enables FIPS on the test VM, which is a RHEL 9 VM (see https://access.redhat.com/solutions/137833#rhel9), then executes the CustomScript extension.

TODO: Investigate whether extensions with protected settings are supported on FIPS-enabled systems. The Agent has issues handling the tenant
certificate on those systems (additional configuration on FIPS may be needed).
"""
def run(self):
ssh_client: SshClient = self._context.create_ssh_client()

try:
command = "fips-mode-setup --enable"
log.info("Enabling FIPS on the test VM [%s]", command)
output = ssh_client.run_command(command, use_sudo=True)
log.info("Enable FIPS completed\n%s", output)
except CommandError as e:
raise Exception(f"Failed to enable FIPS: {e}")

log.info("Restarting test VM")
vm: VirtualMachineClient = VirtualMachineClient(self._context.vm)
vm.restart(wait_for_boot=True, ssh_client=ssh_client)

try:
command = "fips-mode-setup --check"
log.info("Verifying that FIPS is enabled [%s]", command)
output = ssh_client.run_command(command).rstrip()
if output != "FIPS mode is enabled.":
fail(f"FIPS i not enabled - '{command}' returned '{output}'")
log.info(output)
except CommandError as e:
raise Exception(f"Failed to verify that FIPS is enabled: {e}")

custom_script = VirtualMachineExtensionClient(self._context.vm, VmExtensionIds.CustomScript, resource_name="CustomScript")

log.info("Installing %s", custom_script)
message = f"Hello {uuid.uuid4()}!"
custom_script.enable(
settings={
'commandToExecute': f"echo \'{message}\'"
},
auto_upgrade_minor_version=False
)
custom_script.assert_instance_view(expected_version="2.0", expected_message=message)

def get_ignore_error_rules(self) -> List[Dict[str, Any]]:
"""
Some extensions added by policy on the test subscription use protected settings, which produce this error.
"""
return [
{'message': r'Failed to decrypt /var/lib/waagent/Certificates.p7m'}
]


if __name__ == "__main__":
Fips.run_from_command_line()

14 changes: 6 additions & 8 deletions tests_e2e/tests/lib/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,22 @@ def execute_with_retry(operation: Callable[[], Any]) -> Any:
time.sleep(30)


def retry_ssh_run(operation: Callable[[], Any]) -> Any:
def retry_ssh_run(operation: Callable[[], Any], attempts: int, attempt_delay: int) -> Any:
"""
This method attempts to retry ssh run command a few times if operation failed with connection time out
"""
attempts = 3
while attempts > 0:
attempts -= 1
i = 1
while i <= attempts:
try:
return operation()
except Exception as e:
# We raise CommandError on !=0 exit codes in the called method
if isinstance(e, CommandError):
# Instance of 'Exception' has no 'exit_code' member (no-member) - Disabled: e is actually an CommandError
if e.exit_code != 255 or attempts == 0: # pylint: disable=no-member
if e.exit_code != 255 or i == attempts: # pylint: disable=no-member
raise
log.warning("The operation failed, retrying in 30 secs.\n%s", e)
time.sleep(30)

log.warning("The SSH operation failed, retrying in %s secs [Attempt %s/%s].\n%s", e, attempt_delay, i, attempts)
time.sleep(attempt_delay)

def retry_if_false(operation: Callable[[], bool], attempts: int = 5, duration: int = 30) -> bool:
"""
Expand Down
26 changes: 16 additions & 10 deletions tests_e2e/tests/lib/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from tests_e2e.tests.lib import shell
from tests_e2e.tests.lib.retry import retry_ssh_run

ATTEMPTS: int = 3
ATTEMPT_DELAY: int = 30


class SshClient(object):
def __init__(self, ip_address: str, username: str, private_key_file: Path, port: int = 22):
Expand All @@ -31,7 +34,7 @@ def __init__(self, ip_address: str, username: str, private_key_file: Path, port:
self._private_key_file: Path = private_key_file
self._port: int = port

def run_command(self, command: str, use_sudo: bool = False) -> str:
def run_command(self, command: str, use_sudo: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> str:
"""
Executes the given command over SSH and returns its stdout. If the command returns a non-zero exit code,
the function raises a CommandError.
Expand All @@ -44,9 +47,12 @@ def run_command(self, command: str, use_sudo: bool = False) -> str:
# Note that we add ~/bin to the remote PATH, since Python (Pypy) and other test tools are installed there.
# Note, too, that when using sudo we need to carry over the value of PATH to the sudo session
sudo = "sudo env PATH=$PATH PYTHONPATH=$PYTHONPATH" if use_sudo else ''
return retry_ssh_run(lambda: shell.run_command([
"ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file, destination,
f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}"]))
command = [
"ssh", "-o", "StrictHostKeyChecking=no", "-i", self._private_key_file,
destination,
f"if [[ -e ~/bin/set-agent-env ]]; then source ~/bin/set-agent-env; fi; {sudo} {command}"
]
return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay)

@staticmethod
def generate_ssh_key(private_key_file: Path):
Expand All @@ -59,19 +65,19 @@ def generate_ssh_key(private_key_file: Path):
def get_architecture(self):
return self.run_command("uname -m").rstrip()

def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False) -> None:
def copy_to_node(self, local_path: Path, remote_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None:
"""
File copy to a remote node
"""
self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive)
self._copy(local_path, remote_path, remote_source=False, remote_target=True, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay)

def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False) -> None:
def copy_from_node(self, remote_path: Path, local_path: Path, recursive: bool = False, attempts: int = ATTEMPTS, attempt_delay: int = ATTEMPT_DELAY) -> None:
"""
File copy from a remote node
"""
self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive)
self._copy(remote_path, local_path, remote_source=True, remote_target=False, recursive=recursive, attempts=attempts, attempt_delay=attempt_delay)

def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool) -> None:
def _copy(self, source: Path, target: Path, remote_source: bool, remote_target: bool, recursive: bool, attempts: int, attempt_delay: int) -> None:
if remote_source:
source = f"{self._username}@{self._ip_address}:{source}"
if remote_target:
Expand All @@ -82,4 +88,4 @@ def _copy(self, source: Path, target: Path, remote_source: bool, remote_target:
command.append("-r")
command.extend([str(source), str(target)])

shell.run_command(command)
return retry_ssh_run(lambda: shell.run_command(command), attempts, attempt_delay)
61 changes: 56 additions & 5 deletions tests_e2e/tests/lib/virtual_machine_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
# This module includes facilities to execute operations on virtual machines (list extensions, restart, etc).
#

import datetime
import json
import time
from typing import Any, Dict, List

from azure.identity import DefaultAzureCredential
Expand All @@ -32,6 +35,8 @@
from tests_e2e.tests.lib.identifiers import VmIdentifier
from tests_e2e.tests.lib.logging import log
from tests_e2e.tests.lib.retry import execute_with_retry
from tests_e2e.tests.lib.shell import CommandError
from tests_e2e.tests.lib.ssh_client import SshClient


class VirtualMachineClient(AzureClient):
Expand All @@ -54,11 +59,11 @@ def __init__(self, vm: VmIdentifier):
base_url=cloud.endpoints.resource_manager,
credential_scopes=[cloud.endpoints.resource_manager + "/.default"])

def get_description(self) -> VirtualMachine:
def get_model(self) -> VirtualMachine:
Copy link
Member Author

@narrieta narrieta Jun 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially I called this method "get_description" because I wasn't sure what is returning and the documentation just said that it returns the "description" of the VM. It actually returns the model.

"""
Retrieves the description of the virtual machine.
Retrieves the model of the virtual machine.
"""
log.info("Retrieving description for %s", self._identifier)
log.info("Retrieving VM model for %s", self._identifier)
return execute_with_retry(
lambda: self._compute_client.virtual_machines.get(
resource_group_name=self._identifier.resource_group,
Expand Down Expand Up @@ -103,17 +108,63 @@ def update(self, properties: Dict[str, Any], timeout: int = AzureClient._DEFAULT
operation_name=f"Update {self._identifier}",
timeout=timeout)

def restart(self, timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None:
def restart(
self,
wait_for_boot,
ssh_client: SshClient = None,
boot_timeout: datetime.timedelta = datetime.timedelta(minutes=5),
timeout: int = AzureClient._DEFAULT_TIMEOUT) -> None:
"""
Restarts the virtual machine or scale set
Restarts (reboots) the virtual machine.

NOTES:
* If wait_for_boot is True, an SshClient must be provided in order to verify that the restart was successful.
* 'timeout' is the timeout for the restart operation itself, while 'boot_timeout' is the timeout for waiting
the boot to complete.
"""
if wait_for_boot and ssh_client is None:
raise ValueError("An SshClient must be provided if wait_for_boot is True")

before_restart = datetime.datetime.utcnow()

self._execute_async_operation(
lambda: self._compute_client.virtual_machines.begin_restart(
resource_group_name=self._identifier.resource_group,
vm_name=self._identifier.name),
operation_name=f"Restart {self._identifier}",
timeout=timeout)

if not wait_for_boot:
return

start = datetime.datetime.utcnow()
while datetime.datetime.utcnow() < start + boot_timeout:
log.info("Waiting for VM %s to boot", self._identifier)
time.sleep(15) # Note that we always sleep at least 1 time, to give the reboot time to start
instance_view = self.get_instance_view()
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
power_state = [s.code for s in instance_view.statuses if "PowerState" in s.code]
if len(power_state) != 1:
raise Exception(f"Could not find PowerState in the instance view statuses:\n{json.dumps(instance_view.statuses)}")
log.info("VM's Power State: %s", power_state[0])
if power_state[0] == "PowerState/running":
nagworld9 marked this conversation as resolved.
Show resolved Hide resolved
# We may get an instance view captured before the reboot actually happened; verify
# that the reboot actually happened by checking the system's uptime.
log.info("Verifying VM's uptime to ensure the reboot has completed...")
try:
uptime = ssh_client.run_command("cat /proc/uptime | sed 's/ .*//'", attempts=1).rstrip() # The uptime is the first field in the file
log.info("Uptime: %s", uptime)
boot_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=float(uptime))
if boot_time > before_restart:
log.info("VM %s completed boot and is running. Boot time: %s", self._identifier, boot_time)
return
log.info("The VM has not rebooted yet. Restart time: %s. Boot time: %s", before_restart, boot_time)
except CommandError as e:
if e.exit_code == 255 and "Connection refused" in str(e):
log.info("VM %s is not yet accepting SSH connections", self._identifier)
else:
raise
raise Exception(f"VM {self._identifier} did not boot after {boot_timeout}")

def __str__(self):
return f"{self._identifier}"

Expand Down