diff --git a/scripts/coredump_gen_handler.py b/scripts/coredump_gen_handler.py index 895c22146a6b..eaa972bdca1e 100644 --- a/scripts/coredump_gen_handler.py +++ b/scripts/coredump_gen_handler.py @@ -8,6 +8,7 @@ import time import argparse import syslog +import re from swsscommon.swsscommon import SonicV2Connector from utilities_common.auto_techsupport_helper import * @@ -54,7 +55,6 @@ def __init__(self, core_name, container_name, db): self.db = db self.proc_mp = {} self.core_ts_map = {} - self.curr_ts_list = [] def handle_core_dump_creation_event(self): file_path = os.path.join(CORE_DUMP_DIR, self.core_name) @@ -93,7 +93,7 @@ def handle_core_dump_creation_event(self): since_cfg = self.get_since_arg() new_file = self.invoke_ts_cmd(since_cfg) if new_file: - self.write_to_state_db(int(time.time()), new_file[0]) + self.write_to_state_db(int(time.time()), new_file) def write_to_state_db(self, timestamp, ts_dump): name = strip_ts_ext(ts_dump) @@ -111,26 +111,33 @@ def get_since_arg(self): return since_cfg return SINCE_DEFAULT + def parse_ts_dump_name(self, ts_stdout): + """ Figure out the ts_dump name from the techsupport stdout """ + matches = re.findall(TS_PTRN, ts_stdout) + if matches: + return matches[-1] + syslog.syslog(syslog.LOG_ERR, "stdout of the 'show techsupport' cmd doesn't have the dump name") + return "" + def invoke_ts_cmd(self, since_cfg): since_cfg = "'" + since_cfg + "'" - cmd = " ".join(["show", "techsupport", "--since", since_cfg]) - rc, _, stderr = subprocess_exec(["show", "techsupport", "--since", since_cfg], env=ENV_VAR) + cmd_opts = ["show", "techsupport", "--silent", "--since", since_cfg] + cmd = " ".join(cmd_opts) + rc, stdout, stderr = subprocess_exec(cmd_opts, env=ENV_VAR) if not rc: syslog.syslog(syslog.LOG_ERR, "show techsupport failed with exit code {}, stderr:{}".format(rc, stderr)) - new_list = get_ts_dumps(True) - diff = list(set(new_list).difference(set(self.curr_ts_list))) - self.curr_ts_list = new_list - if not diff: + new_dump = self.parse_ts_dump_name(stdout) + if not new_dump: syslog.syslog(syslog.LOG_ERR, "{} was run, but no techsupport dump is found".format(cmd)) else: - syslog.syslog(syslog.LOG_INFO, "{} is successful, {} is created".format(cmd, diff)) - return diff + syslog.syslog(syslog.LOG_INFO, "{} is successful, {} is created".format(cmd, new_dump)) + return new_dump def verify_rate_limit_intervals(self, global_cooloff, container_cooloff): """Verify both the global and per-proc rate_limit_intervals have passed""" - self.curr_ts_list = get_ts_dumps(True) - if global_cooloff and self.curr_ts_list: - last_ts_dump_creation = os.path.getmtime(self.curr_ts_list[-1]) + curr_ts_list = get_ts_dumps(True) + if global_cooloff and curr_ts_list: + last_ts_dump_creation = os.path.getmtime(curr_ts_list[-1]) if time.time() - last_ts_dump_creation < global_cooloff: msg = "Global rate_limit_interval period has not passed. Techsupport Invocation is skipped. Core: {}" syslog.syslog(syslog.LOG_INFO, msg.format(self.core_name)) diff --git a/scripts/generate_dump b/scripts/generate_dump index d5f3bb67311e..85bc930a5a2a 100755 --- a/scripts/generate_dump +++ b/scripts/generate_dump @@ -1339,8 +1339,7 @@ main() { fi # Invoke the TechSupport Cleanup Hook - setsid $(echo > /tmp/techsupport_cleanup.log; - python3 /usr/local/bin/techsupport_cleanup.py ${TARFILE} &>> /tmp/techsupport_cleanup.log) & + setsid python3 /usr/local/bin/techsupport_cleanup.py ${TARFILE} &> /tmp/techsupport_cleanup.log & echo ${TARFILE} diff --git a/scripts/techsupport_cleanup.py b/scripts/techsupport_cleanup.py index 53a10562e7a9..ba5a3c1798b4 100644 --- a/scripts/techsupport_cleanup.py +++ b/scripts/techsupport_cleanup.py @@ -22,7 +22,7 @@ def handle_techsupport_creation_event(dump_name, db): file_path = os.path.join(TS_DIR, dump_name) if not verify_recent_file_creation(file_path): return - _ , num_bytes = get_stats(os.path.join(TS_DIR, TS_PTRN)) + _ , num_bytes = get_stats(os.path.join(TS_DIR, TS_PTRN_GLOB)) if db.get(CFG_DB, AUTO_TS, CFG_STATE) != "enabled": msg = "techsupport_cleanup is disabled. No cleanup is performed. current size occupied : {}" @@ -40,7 +40,7 @@ def handle_techsupport_creation_event(dump_name, db): syslog.syslog(syslog.LOG_NOTICE, msg.format(pretty_size(num_bytes))) return - removed_files = cleanup_process(max_ts, TS_PTRN, TS_DIR) + removed_files = cleanup_process(max_ts, TS_PTRN_GLOB, TS_DIR) clean_state_db_entries(removed_files, db) diff --git a/tests/coredump_gen_handler_test.py b/tests/coredump_gen_handler_test.py index c742f09d06ac..10a470b6b020 100644 --- a/tests/coredump_gen_handler_test.py +++ b/tests/coredump_gen_handler_test.py @@ -12,6 +12,11 @@ sys.path.append("scripts") import coredump_gen_handler as cdump_mod +AUTO_TS_STDOUT=""" +Techsupport is running with silent option. This command might take a long time. +The SAI dump is generated to /tmp/saisdkdump/sai_sdk_dump_11_22_2021_11_07_PM +/tmp/saisdkdump +""" def set_auto_ts_cfg(redis_mock, state="disabled", rate_limit_interval="0", @@ -74,12 +79,13 @@ def test_invoc_ts_state_db_update(self): populate_state_db(redis_mock) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/dump/sonic_dump_random2.tar.gz") @@ -105,12 +111,13 @@ def test_global_rate_limit_interval(self): populate_state_db(redis_mock) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/dump/sonic_dump_random2.tar.gz") @@ -138,12 +145,13 @@ def test_per_container_rate_limit_interval(self): "orchagent;{};swss".format(int(time.time()))}) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/core/orchagent.12345.123.core.gz") @@ -167,12 +175,13 @@ def test_invoc_ts_after_rate_limit_interval(self): "orchagent;{};swss".format(int(time.time()))}) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/dump/sonic_dump_random2.tar.gz") @@ -197,12 +206,13 @@ def test_core_dump_with_invalid_container_name(self): populate_state_db(redis_mock, {}) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/core/snmpd.12345.123.core.gz") @@ -225,12 +235,13 @@ def test_feature_table_not_set(self): populate_state_db(redis_mock, {}) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/core/python3.12345.123.core.gz") @@ -251,10 +262,11 @@ def test_since_argument(self): populate_state_db(redis_mock) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) - if "show techsupport --since '4 days ago'" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") - return 0, "", "" + if "--since '4 days ago'" in cmd_str: + patcher.fs.create_file(ts_dump) + return 0, AUTO_TS_STDOUT + ts_dump, "" elif "date --date='4 days ago'" in cmd_str: return 0, "", "" else: @@ -284,12 +296,13 @@ def test_masic_core_dump(self): populate_state_db(redis_mock) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") + patcher.fs.create_file(ts_dump) else: return 1, "", "Command Not Found" - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" cdump_mod.subprocess_exec = mock_cmd patcher.fs.create_file("/var/dump/sonic_dump_random1.tar.gz") patcher.fs.create_file("/var/dump/sonic_dump_random2.tar.gz") @@ -315,10 +328,12 @@ def test_invalid_since_argument(self): populate_state_db(redis_mock) with Patcher() as patcher: def mock_cmd(cmd, env): + ts_dump = "/var/dump/sonic_dump_random3.tar.gz" cmd_str = " ".join(cmd) - if "show techsupport --since '2 days ago'" in cmd_str: - patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") - return 0, "", "" + if "--since '2 days ago'" in cmd_str: + patcher.fs.create_file(ts_dump) + print(AUTO_TS_STDOUT + ts_dump) + return 0, AUTO_TS_STDOUT + ts_dump, "" elif "date --date='whatever'" in cmd_str: return 1, "", "Invalid Date Format" else: @@ -370,7 +385,7 @@ def mock_cmd(cmd, env): cmd_str = " ".join(cmd) if "show techsupport" in cmd_str: patcher.fs.create_file("/var/dump/sonic_dump_random3.tar.gz") - return 0, "", "" + return 0, AUTO_TS_STDOUT + ts_dump, "" patcher.fs.set_disk_usage(2000, path="/var/core/") patcher.fs.create_file("/var/core/orchagent.12345.123.core.gz", st_size=25) patcher.fs.create_file("/var/core/lldpmgrd.12345.22.core.gz", st_size=25) diff --git a/utilities_common/auto_techsupport_helper.py b/utilities_common/auto_techsupport_helper.py index b94b7828974b..0fba65652112 100644 --- a/utilities_common/auto_techsupport_helper.py +++ b/utilities_common/auto_techsupport_helper.py @@ -12,7 +12,7 @@ "CFG_DB", "AUTO_TS", "CFG_STATE", "CFG_MAX_TS", "COOLOFF", "CFG_CORE_USAGE", "CFG_SINCE", "FEATURE", "STATE_DB", "TS_MAP", "CORE_DUMP", "TIMESTAMP", "CONTAINER", - "TIME_BUF", "SINCE_DEFAULT" + "TIME_BUF", "SINCE_DEFAULT", "TS_PTRN_GLOB" ] + [ # Methods "verify_recent_file_creation", "get_ts_dumps", @@ -30,7 +30,9 @@ CORE_DUMP_PTRN = "*.core.gz" TS_DIR = "/var/dump" -TS_PTRN = "sonic_dump_*.tar*" +TS_ROOT = "sonic_dump_*" +TS_PTRN = "sonic_dump_.*tar.*" # Regex Exp +TS_PTRN_GLOB = "sonic_dump_*tar*" # Glob Exp # CONFIG DB Attributes CFG_DB = "CONFIG_DB" @@ -78,8 +80,10 @@ def strip_ts_ext(ts_path): def get_ts_dumps(full_path=False): - """ Get the list of TS dumps in the TS_DIR, sorted by the creation time """ - curr_list = glob.glob(os.path.join(TS_DIR, TS_PTRN)) + """ + Get the list of TS dumps in the TS_DIR, sorted by the creation time + """ + curr_list = glob.glob(os.path.join(TS_DIR, TS_ROOT)) curr_list.sort(key=os.path.getmtime) if full_path: return curr_list