diff --git a/tests/common/plugins/sanity_check/constants.py b/tests/common/plugins/sanity_check/constants.py index a497caee41..6d2b49181a 100644 --- a/tests/common/plugins/sanity_check/constants.py +++ b/tests/common/plugins/sanity_check/constants.py @@ -22,43 +22,43 @@ # Recover related definitions RECOVER_METHODS = { "config_reload": { - "cmd": "bash -c 'config reload -y &>/dev/null'", - "reboot": False, - "adaptive": False, - 'recover_wait': 120 - }, - "config_reload_f": { - "cmd": "bash -c 'config reload -f -y &>/dev/null'", + "cmd": "false", + "reload": True, "reboot": False, "adaptive": False, 'recover_wait': 120 }, "load_minigraph": { "cmd": "bash -c 'config load_minigraph -y &>/dev/null'", + "reload": False, "reboot": False, "adaptive": False, 'recover_wait': 60 }, "reboot": { "cmd": "reboot", + "reload": False, "reboot": True, "adaptive": False, 'recover_wait': 120 }, "warm_reboot": { "cmd": "warm-reboot", + "reload": False, "reboot": True, "adaptive": False, 'recover_wait': 120 }, "fast_reboot": { "cmd": "fast_reboot", + "reload": False, "reboot": True, "adaptive": False, 'recover_wait': 120 }, "adaptive": { "cmd": None, + "reload": False, "reboot": False, "adaptive": True, 'recover_wait': 30 diff --git a/tests/common/plugins/sanity_check/recover.py b/tests/common/plugins/sanity_check/recover.py index 2a8b22c64b..26eb5e5bb9 100644 --- a/tests/common/plugins/sanity_check/recover.py +++ b/tests/common/plugins/sanity_check/recover.py @@ -1,16 +1,14 @@ import json import logging -from . import constants - -from tests.common.utilities import wait -from tests.common.platform.device_utils import fanout_switch_port_lookup -from tests.common.config_reload import config_force_option_supported -from tests.common.reboot import reboot -from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_FAST, REBOOT_TYPE_COLD -from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp from tests.common import config_reload from tests.common.devices.sonic import SonicHost +from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp +from tests.common.platform.device_utils import fanout_switch_port_lookup +from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_FAST, REBOOT_TYPE_COLD +from tests.common.reboot import reboot +from tests.common.utilities import wait +from . import constants logger = logging.getLogger(__name__) @@ -25,7 +23,7 @@ def reboot_dut(dut, localhost, cmd): else: reboot_type = REBOOT_TYPE_COLD - reboot(dut, localhost, reboot_type=reboot_type) + reboot(dut, localhost, reboot_type=reboot_type, safe_reboot=True, check_intf_up_ports=True) def _recover_interfaces(dut, fanouthosts, result, wait_time): @@ -160,11 +158,11 @@ def adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_result .format(result, action, outstanding_action)) if outstanding_action: - if outstanding_action == "config_reload" and config_force_option_supported(dut): - outstanding_action = "config_reload_f" method = constants.RECOVER_METHODS[outstanding_action] wait_time = method['recover_wait'] - if method["reboot"]: + if method["reload"]: + config_reload(dut, safe_reload=True, check_intf_up_ports=True) + elif method["reboot"]: reboot_dut(dut, localhost, method["cmd"]) else: _recover_with_command(dut, method['cmd'], wait_time) @@ -172,12 +170,13 @@ def adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_result def recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, recover_method): logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method)) - if recover_method == "config_reload" and config_force_option_supported(dut): - recover_method = "config_reload_f" + method = constants.RECOVER_METHODS[recover_method] wait_time = method['recover_wait'] if method["adaptive"]: adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, wait_time) + elif method["reload"]: + config_reload(dut, safe_reload=True, check_intf_up_ports=True) elif method["reboot"]: reboot_dut(dut, localhost, method["cmd"]) else: diff --git a/tests/common/reboot.py b/tests/common/reboot.py index 854840298f..38513b8e5b 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -6,6 +6,10 @@ import os from multiprocessing.pool import ThreadPool from collections import deque + +from .helpers.assertions import pytest_assert +from .platform.interface_utils import check_interface_status_of_up_ports +from .platform.processes_utils import wait_critical_processes from .utilities import wait_until, get_plt_reboot_ctrl from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs @@ -195,7 +199,8 @@ def execute_reboot_helper(): def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=0, wait=0, wait_for_ssh=True, wait_warmboot_finalizer=False, warmboot_finalizer_timeout=0, - reboot_helper=None, reboot_kwargs=None, plt_reboot_ctrl_overwrite=True): + reboot_helper=None, reboot_kwargs=None, plt_reboot_ctrl_overwrite=True, + safe_reboot=False, check_intf_up_ports=False): """ reboots DUT :param duthost: DUT host object @@ -208,6 +213,8 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, :param wait_warmboot_finalizer=True: Wait for WARMBOOT_FINALIZER done :param reboot_helper: helper function to execute the power toggling :param reboot_kwargs: arguments to pass to the reboot_helper + :param safe_reboot: arguments to wait DUT ready after reboot + :param check_intf_up_ports: arguments to check interface after reboot :return: """ pool = ThreadPool() @@ -240,7 +247,20 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, logger.info('waiting for switch {} to initialize'.format(hostname)) - time.sleep(wait) + if safe_reboot: + # The wait time passed in might not be guaranteed to cover the actual + # time it takes for containers to come back up. Therefore, add 5 + # minutes to the maximum wait time. If it's ready sooner, then the + # function will return sooner. + pytest_assert(wait_until(wait + 300, 20, 0, duthost.critical_services_fully_started), + "All critical services should be fully started!") + wait_critical_processes(duthost) + + if check_intf_up_ports: + pytest_assert(wait_until(300, 20, 0, check_interface_status_of_up_ports, duthost), + "Not all ports that are admin up on are operationally up") + else: + time.sleep(wait) # Wait warmboot-finalizer service if reboot_type == REBOOT_TYPE_WARM and wait_warmboot_finalizer: