Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] Fix multi-asic flaky lldp not ready issue #9453

Merged
merged 2 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions tests/common/plugins/sanity_check/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,43 +22,43 @@
# Recover related definitions
RECOVER_METHODS = {
"config_reload": {
"cmd": "bash -c 'config reload -y &>/dev/null'",
"reboot": False,
"adaptive": False,
'recover_wait': 120
},
"config_reload_f": {
wangxin marked this conversation as resolved.
Show resolved Hide resolved
"cmd": "bash -c 'config reload -f -y &>/dev/null'",
"cmd": "false",
"reload": True,
"reboot": False,
"adaptive": False,
'recover_wait': 120
},
"load_minigraph": {
"cmd": "bash -c 'config load_minigraph -y &>/dev/null'",
"reload": False,
"reboot": False,
"adaptive": False,
'recover_wait': 60
},
"reboot": {
"cmd": "reboot",
"reload": False,
"reboot": True,
"adaptive": False,
'recover_wait': 120
},
"warm_reboot": {
"cmd": "warm-reboot",
"reload": False,
"reboot": True,
"adaptive": False,
'recover_wait': 120
},
"fast_reboot": {
"cmd": "fast_reboot",
"reload": False,
"reboot": True,
"adaptive": False,
'recover_wait': 120
},
"adaptive": {
"cmd": None,
"reload": False,
"reboot": False,
"adaptive": True,
'recover_wait': 30
Expand Down
27 changes: 13 additions & 14 deletions tests/common/plugins/sanity_check/recover.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import json
import logging

from . import constants

from tests.common.utilities import wait
from tests.common.platform.device_utils import fanout_switch_port_lookup
from tests.common.config_reload import config_force_option_supported
from tests.common.reboot import reboot
from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_FAST, REBOOT_TYPE_COLD
from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp
from tests.common import config_reload
from tests.common.devices.sonic import SonicHost
from tests.common.helpers.parallel import parallel_run, reset_ansible_local_tmp
from tests.common.platform.device_utils import fanout_switch_port_lookup
from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_FAST, REBOOT_TYPE_COLD
from tests.common.reboot import reboot
from tests.common.utilities import wait
from . import constants

logger = logging.getLogger(__name__)

Expand All @@ -25,7 +23,7 @@ def reboot_dut(dut, localhost, cmd):
else:
reboot_type = REBOOT_TYPE_COLD

reboot(dut, localhost, reboot_type=reboot_type)
reboot(dut, localhost, reboot_type=reboot_type, safe_reboot=True, check_intf_up_ports=True)


def _recover_interfaces(dut, fanouthosts, result, wait_time):
Expand Down Expand Up @@ -160,24 +158,25 @@ def adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_result
.format(result, action, outstanding_action))

if outstanding_action:
if outstanding_action == "config_reload" and config_force_option_supported(dut):
outstanding_action = "config_reload_f"
method = constants.RECOVER_METHODS[outstanding_action]
wait_time = method['recover_wait']
if method["reboot"]:
if method["reload"]:
config_reload(dut, safe_reload=True, check_intf_up_ports=True)
elif method["reboot"]:
reboot_dut(dut, localhost, method["cmd"])
else:
_recover_with_command(dut, method['cmd'], wait_time)


def recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, recover_method):
logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method))
if recover_method == "config_reload" and config_force_option_supported(dut):
recover_method = "config_reload_f"

method = constants.RECOVER_METHODS[recover_method]
wait_time = method['recover_wait']
if method["adaptive"]:
adaptive_recover(dut, localhost, fanouthosts, nbrhosts, tbinfo, check_results, wait_time)
elif method["reload"]:
config_reload(dut, safe_reload=True, check_intf_up_ports=True)
elif method["reboot"]:
reboot_dut(dut, localhost, method["cmd"])
else:
Expand Down
24 changes: 22 additions & 2 deletions tests/common/reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import os
from multiprocessing.pool import ThreadPool
from collections import deque

from .helpers.assertions import pytest_assert
from .platform.interface_utils import check_interface_status_of_up_ports
from .platform.processes_utils import wait_critical_processes
from .utilities import wait_until, get_plt_reboot_ctrl
from tests.common.helpers.dut_utils import ignore_t2_syslog_msgs

Expand Down Expand Up @@ -188,7 +192,8 @@ def execute_reboot_helper():

def reboot(duthost, localhost, reboot_type='cold', delay=10,
timeout=0, wait=0, wait_for_ssh=True, wait_warmboot_finalizer=False, warmboot_finalizer_timeout=0,
reboot_helper=None, reboot_kwargs=None, plt_reboot_ctrl_overwrite=True):
reboot_helper=None, reboot_kwargs=None, plt_reboot_ctrl_overwrite=True,
safe_reboot=False, check_intf_up_ports=False):
"""
reboots DUT
:param duthost: DUT host object
Expand All @@ -201,6 +206,8 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,
:param wait_warmboot_finalizer=True: Wait for WARMBOOT_FINALIZER done
:param reboot_helper: helper function to execute the power toggling
:param reboot_kwargs: arguments to pass to the reboot_helper
:param safe_reboot: arguments to wait DUT ready after reboot
:param check_intf_up_ports: arguments to check interface after reboot
:return:
"""
pool = ThreadPool()
Expand Down Expand Up @@ -233,7 +240,20 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,

logger.info('waiting for switch {} to initialize'.format(hostname))

time.sleep(wait)
if safe_reboot:
# The wait time passed in might not be guaranteed to cover the actual
# time it takes for containers to come back up. Therefore, add 5
# minutes to the maximum wait time. If it's ready sooner, then the
# function will return sooner.
pytest_assert(wait_until(wait + 300, 20, 0, duthost.critical_services_fully_started),
"All critical services should be fully started!")
wait_critical_processes(duthost)

if check_intf_up_ports:
pytest_assert(wait_until(300, 20, 0, check_interface_status_of_up_ports, duthost),
"Not all ports that are admin up on are operationally up")
else:
time.sleep(wait)

# Wait warmboot-finalizer service
if reboot_type == REBOOT_TYPE_WARM and wait_warmboot_finalizer:
Expand Down