diff --git a/tests/system_health/test_watchdog.py b/tests/system_health/test_watchdog.py index 976bda6076..a96a98945d 100644 --- a/tests/system_health/test_watchdog.py +++ b/tests/system_health/test_watchdog.py @@ -16,14 +16,46 @@ @pytest.fixture def pause_orchagent(duthost): - # find orchagent pid - pid = duthost.shell( - r"pgrep orchagent", - module_ignore_errors=True)['stdout'] - logger.info('Get orchagent pid: {}'.format(pid)) - - # pause orchagent and clear syslog - duthost.shell(r"sudo kill -STOP {}".format(pid), module_ignore_errors=True) + pid = None + retry = 3 + while True: + retry -= 1 + # find orchagent pid: https://www.man7.org/linux/man-pages/man1/pidof.1.html + pid_result = duthost.shell( + r"pidof orchagent", + module_ignore_errors=True) + + rc = pid_result['rc'] + if rc == 1: + logger.info('Get orchagent pid failed: {}'.format(pid_result)) + + if retry <= 0: + # break UT because orchagent pause failed + pytest.fail("Can't pause Orchagent by pid.") + else: + continue + + pid = pid_result['stdout'] + logger.info('Get orchagent pid: {}'.format(pid)) + + # pause orchagent + duthost.shell(r"sudo kill -STOP {}".format(pid), module_ignore_errors=True) + + # validate orchagent paused, the stat colum should be Tl: + # root 124 0.3 1.6 596616 63600 pts/0 Tl 02:33 0:06 /usr/bin/orchagent + result = check_process_status(duthost, "'Tl.*/usr/bin/orchagent''") + if result: + # continue UT when Orchagent paused + break + else: + # collect log for investigation not paused reason + duthost.shell(r"sudo ps -auxww", module_ignore_errors=True) + duthost.shell(r"sudo cat /var/log/syslog | grep orchagent", module_ignore_errors=True) + + if retry <= 0: + # break UT because orchagent pause failed + pytest.fail("Can't pause Orchagent by pid.") + duthost.shell(r"sudo truncate -s 0 /var/log/syslog", module_ignore_errors=True) yield