Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integrated reboot-cause history into reboot tests in platfrom_tests/test_reboot.py #4270

Merged
merged 2 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion tests/common/reboot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import threading
import time
import re
import logging
from multiprocessing.pool import ThreadPool, TimeoutError
from errors import RunAnsibleModuleFail
from collections import deque

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,7 +44,7 @@
"wait": 120,
# We are searching two types of reboot cause.
# This change relates to changes of PR #6130 in sonic-buildimage repository
"cause": r"'reboot'|Non-Hardware \(reboot",
"cause": r"'reboot'|Non-Hardware \(reboot|^reboot",
"test_reboot_cause_only": False
},
REBOOT_TYPE_SOFT: {
Expand Down Expand Up @@ -75,6 +77,11 @@
}
}

MAX_NUM_REBOOT_CAUSE_HISTORY = 10
REBOOT_TYPE_HISTOYR_QUEUE = deque([], MAX_NUM_REBOOT_CAUSE_HISTORY)
REBOOT_CAUSE_HISTORY_TITLE = ["name", "cause", "time", "user", "comment"]


def get_warmboot_finalizer_state(duthost):
try:
res = duthost.command('systemctl is-active warmboot-finalizer.service',module_ignore_errors=True)
Expand Down Expand Up @@ -223,3 +230,46 @@ def check_reboot_cause(dut, reboot_cause_expected):
reboot_cause_got = get_reboot_cause(dut)
logging.debug("dut {} last reboot-cause {}".format(dut.hostname, reboot_cause_got))
return reboot_cause_got == reboot_cause_expected


def check_reboot_cause_history(dut, reboot_type_history_queue):
"""
@summary: Check the reboot cause history on DUT. Can be used with wailt_until
@param dut: The AnsibleHost object of DUT.
@param reboot_type_history_queue: reboot type queue.
e.g.
show reboot-cause history
Name Cause Time User Comment
------------------- ------------- ------------------------------- ------ ---------
2021_09_09_14_15_13 Power Loss () N/A N/A N/A
2021_09_09_14_06_17 reboot Thu 09 Sep 2021 02:05:17 PM UTC admin N/A
2021_09_09_13_59_11 Watchdog () N/A N/A N/A
2021_09_09_13_52_13 Power Loss () N/A N/A N/A
2021_09_09_13_45_18 warm-reboot Thu 09 Sep 2021 01:44:14 PM UTC admin N/A
2021_09_09_13_37_58 fast-reboot Thu 09 Sep 2021 01:37:09 PM UTC admin N/A
2021_09_09_13_30_52 soft-reboot Thu 09 Sep 2021 01:30:24 PM UTC admin N/A
2021_09_09_13_24_17 reboot Thu 09 Sep 2021 01:23:17 PM UTC admin N/A
"""
reboot_cause_history_got = dut.show_and_parse("show reboot-cause history")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

show reboot-cause history is a new feature in 202012, can we make it a new test and skip the test for release before 202012?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because reboot consume too much time, so I don't create a new test for it, and want to leverage the reboot-cause history generated by them to test it.
can we add an version check when version is before 202012, skip the test. Is it ok?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok to skip the check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

logging.debug("dut {} reboot-cause history {}. reboot type history queue is {}".format(
dut.hostname, reboot_cause_history_got, reboot_type_history_queue))

logging.info("Verify reboot-cause history title")
if reboot_cause_history_got:
if not set(REBOOT_CAUSE_HISTORY_TITLE) == set(reboot_cause_history_got[0].keys()):
logging.error("Expected reboot-cause history title:{} not match actual reboot-cause history title:{}".format(
REBOOT_CAUSE_HISTORY_TITLE, reboot_cause_history_got[0].keys()))
return False

logging.info("Verify reboot-cause output are sorted in reverse chronological order" )
reboot_type_history_len = len(reboot_type_history_queue)
if reboot_type_history_len <= len(reboot_cause_history_got):
for index, reboot_type in enumerate(reboot_type_history_queue):
if not re.search(reboot_ctrl_dict[reboot_type]["cause"], reboot_cause_history_got[reboot_type_history_len-index-1]["cause"]):
logging.error("The {} reboot-cause not match. expected_reboot type={}, actual_reboot_cause={}".format(
index, reboot_ctrl_dict[reboot_type]["cause"], reboot_cause_history_got[reboot_type_history_len-index]["cause"]))
return False
return True
logging.error("The number of expected reboot-cause:{} is more than that of actual reboot-cuase:{}".format(
reboot_type_history_len, len(reboot_type_history_queue)))
return False
7 changes: 7 additions & 0 deletions tests/platform_tests/test_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def reboot_and_check(localhost, dut, interfaces, xcvr_skip_list, reboot_type=REB
logging.info("Run %s reboot on DUT" % reboot_type)

reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=reboot_helper, reboot_kwargs=reboot_kwargs)
REBOOT_TYPE_HISTOYR_QUEUE.append(reboot_type)

check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type)

Expand All @@ -77,6 +78,12 @@ def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type =
assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \
"got reboot-cause failed after rebooted by %s" % reboot_type

if "201811" in dut.os_version or "201911" in dut.os_version:
logging.info("Skip check reboot-cause history for version before 202012")
else:
logger.info("Check reboot-cause history")
assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause_history, dut,
REBOOT_TYPE_HISTOYR_QUEUE), "Check reboot-cause history failed after rebooted by %s" % reboot_type
if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type)
return
Expand Down