From 79a21ceff25cbe3fda8e09e4ad0db6f750b1be0d Mon Sep 17 00:00:00 2001 From: StormLiangMS <89824293+StormLiangMS@users.noreply.github.com> Date: Tue, 28 Mar 2023 21:40:10 +0800 Subject: [PATCH] Revert frr route check (#2761) * Revert "[route_check] remove check-frr_patch mock (#2732)" This reverts commit f27dea0cfdefbdcfc03d19136e4ae47ea72fd51f. * Revert "[route_check] fix IPv6 address handling (#2722)" This reverts commit ff6883233a3c86e993add50453c3152745eaff0d. * Revert "[route_check] implement a check for FRR routes not marked offloaded (#2531)" This reverts commit 90d70152c76f40bf7c1f8e2c6aff6eb58b951a05. --- scripts/route_check.py | 122 +++---------------------------- tests/mock_tables/config_db.json | 3 +- tests/route_check_test.py | 17 +---- tests/route_check_test_data.py | 122 +------------------------------ 4 files changed, 16 insertions(+), 248 deletions(-) diff --git a/scripts/route_check.py b/scripts/route_check.py index c832b2c6ea..c6234bcc9d 100755 --- a/scripts/route_check.py +++ b/scripts/route_check.py @@ -11,11 +11,11 @@ How: NOTE: The flow from APPL-DB to ASIC-DB takes non zero milliseconds. 1) Initiate subscribe for ASIC-DB updates. - 2) Read APPL-DB & ASIC-DB + 2) Read APPL-DB & ASIC-DB 3) Get the diff. - 4) If any diff, + 4) If any diff, 4.1) Collect subscribe messages for a second - 4.2) check diff against the subscribe messages + 4.2) check diff against the subscribe messages 5) Rule out local interfaces & default routes 6) If still outstanding diffs, report failure. @@ -29,7 +29,7 @@ down to ensure failure. Analyze the reported failures to match expected. You may use the exit code to verify the result as success or not. - + """ @@ -45,9 +45,7 @@ import time import signal import traceback -import subprocess -from ipaddress import ip_network from swsscommon import swsscommon from utilities_common import chassis @@ -73,9 +71,6 @@ PRINT_MSG_LEN_MAX = 1000 -FRR_CHECK_RETRIES = 3 -FRR_WAIT_TIME = 15 - class Level(Enum): ERR = 'ERR' INFO = 'INFO' @@ -146,7 +141,7 @@ def add_prefix(ip): ip = ip + PREFIX_SEPARATOR + "32" else: ip = ip + PREFIX_SEPARATOR + "128" - return str(ip_network(ip)) + return ip def add_prefix_ifnot(ip): @@ -155,7 +150,7 @@ def add_prefix_ifnot(ip): :param ip: IP to add prefix as string. :return ip with prefix """ - return str(ip_network(ip)) if ip.find(PREFIX_SEPARATOR) != -1 else add_prefix(ip) + return ip if ip.find(PREFIX_SEPARATOR) != -1 else add_prefix(ip) def is_local(ip): @@ -298,7 +293,7 @@ def get_routes(): def get_route_entries(): """ - helper to read present route entries from ASIC-DB and + helper to read present route entries from ASIC-DB and as well initiate selector for ASIC-DB:ASIC-state updates. :return (selector, subscriber, ) """ @@ -314,7 +309,7 @@ def get_route_entries(): res, e = checkout_rt_entry(k) if res: rt.append(e) - + print_message(syslog.LOG_DEBUG, json.dumps({"ASIC_ROUTE_ENTRY": sorted(rt)}, indent=4)) selector = swsscommon.Select() @@ -322,31 +317,6 @@ def get_route_entries(): return (selector, subs, sorted(rt)) -def is_suppress_fib_pending_enabled(): - """ - Returns True if FIB suppression is enabled, False otherwise - """ - cfg_db = swsscommon.ConfigDBConnector() - cfg_db.connect() - - state = cfg_db.get_entry('DEVICE_METADATA', 'localhost').get('suppress-fib-pending') - - return state == 'enabled' - - -def get_frr_routes(): - """ - Read routes from zebra through CLI command - :return frr routes dictionary - """ - - output = subprocess.check_output('show ip route json', shell=True) - routes = json.loads(output) - output = subprocess.check_output('show ipv6 route json', shell=True) - routes.update(json.loads(output)) - return routes - - def get_interfaces(): """ helper to read interface table from APPL-DB. @@ -384,7 +354,7 @@ def filter_out_local_interfaces(keys): chassis_local_intfs = chassis.get_chassis_local_interfaces() local_if_lst.update(set(chassis_local_intfs)) - + db = swsscommon.DBConnector(APPL_DB_NAME, 0) tbl = swsscommon.Table(db, 'ROUTE_TABLE') @@ -523,61 +493,6 @@ def filter_out_standalone_tunnel_routes(routes): return updated_routes -def check_frr_pending_routes(): - """ - Check FRR routes for offload flag presence by executing "show ip route json" - Returns a list of routes that have no offload flag. - """ - - missed_rt = [] - - retries = FRR_CHECK_RETRIES - for i in range(retries): - missed_rt = [] - frr_routes = get_frr_routes() - - for _, entries in frr_routes.items(): - for entry in entries: - if entry['protocol'] != 'bgp': - continue - - # TODO: Also handle VRF routes. Currently this script does not check for VRF routes so it would be incorrect for us - # to assume they are installed in ASIC_DB, so we don't handle them. - if entry['vrfName'] != 'default': - continue - - if not entry.get('offloaded', False): - missed_rt.append(entry) - - if not missed_rt: - break - - time.sleep(FRR_WAIT_TIME) - - return missed_rt - - -def mitigate_installed_not_offloaded_frr_routes(missed_frr_rt, rt_appl): - """ - Mitigate installed but not offloaded FRR routes. - - In case route exists in APPL_DB, this function will manually send a notification to fpmsyncd - to trigger the flow that sends offload flag to zebra. - - It is designed to mitigate a problem when orchagent fails to send notification about installed route to fpmsyncd - or fpmsyncd not being able to read the notification or in case zebra fails to receive offload update due to variety of reasons. - All of the above mentioned cases must be considered as a bug, but even in that case we will report an error in the log but - given that this script ensures the route is installed in the hardware it will automitigate such a bug. - """ - db = swsscommon.DBConnector('APPL_STATE_DB', 0) - response_producer = swsscommon.NotificationProducer(db, f'{APPL_DB_NAME}_{swsscommon.APP_ROUTE_TABLE_NAME}_RESPONSE_CHANNEL') - for entry in [entry for entry in missed_frr_rt if entry['prefix'] in rt_appl]: - fvs = swsscommon.FieldValuePairs([('err_str', 'SWSS_RC_SUCCESS'), ('protocol', entry['protocol'])]) - response_producer.send('SWSS_RC_SUCCESS', entry['prefix'], fvs) - - print_message(syslog.LOG_ERR, f'Mitigated route {entry["prefix"]}') - - def get_soc_ips(config_db): mux_table = config_db.get_table('MUX_CABLE') soc_ips = [] @@ -621,7 +536,7 @@ def check_routes(): """ The heart of this script which runs the checks. Read APPL-DB & ASIC-DB, the relevant tables for route checking. - Checkout routes in ASIC-DB to match APPL-DB, discounting local & + Checkout routes in ASIC-DB to match APPL-DB, discounting local & default routes. In case of missed / unexpected entries in ASIC, it might be due to update latency between APPL & ASIC DBs. So collect ASIC-DB subscribe updates for a second, and checkout if you see SET @@ -630,16 +545,12 @@ def check_routes(): If there are still some unjustifiable diffs, between APPL & ASIC DB, related to routes report failure, else all good. - If there are FRR routes that aren't marked offloaded but all APPL & ASIC DB - routes are in sync report failure and perform a mitigation action. - :return (0, None) on sucess, else (-1, results) where results holds the unjustifiable entries. """ intf_appl_miss = [] rt_appl_miss = [] rt_asic_miss = [] - rt_frr_miss = [] results = {} adds = [] @@ -688,22 +599,11 @@ def check_routes(): if rt_asic_miss: results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss - rt_frr_miss = check_frr_pending_routes() - - if rt_frr_miss: - results["missed_FRR_routes"] = rt_frr_miss - if results: print_message(syslog.LOG_WARNING, "Failure results: {", json.dumps(results, indent=4), "}") print_message(syslog.LOG_WARNING, "Failed. Look at reported mismatches above") print_message(syslog.LOG_WARNING, "add: ", json.dumps(adds, indent=4)) print_message(syslog.LOG_WARNING, "del: ", json.dumps(deletes, indent=4)) - - if rt_frr_miss and not rt_appl_miss and not rt_asic_miss: - print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR but all routes in APPL_DB and ASIC_DB are in sync") - if is_suppress_fib_pending_enabled(): - mitigate_installed_not_offloaded_frr_routes(rt_frr_miss, rt_appl) - return -1, results else: print_message(syslog.LOG_INFO, "All good!") @@ -749,7 +649,7 @@ def main(): return ret, res else: return ret, res - + if __name__ == "__main__": diff --git a/tests/mock_tables/config_db.json b/tests/mock_tables/config_db.json index 3a2b681a6e..22744365f1 100644 --- a/tests/mock_tables/config_db.json +++ b/tests/mock_tables/config_db.json @@ -842,8 +842,7 @@ "mac": "1d:34:db:16:a6:00", "platform": "x86_64-mlnx_msn3800-r0", "peer_switch": "sonic-switch", - "type": "ToRRouter", - "suppress-fib-pending": "enabled" + "type": "ToRRouter" }, "SNMP_COMMUNITY|msft": { "TYPE": "RO" diff --git a/tests/route_check_test.py b/tests/route_check_test.py index 118e9eab56..85e6a64a95 100644 --- a/tests/route_check_test.py +++ b/tests/route_check_test.py @@ -7,7 +7,7 @@ import time from sonic_py_common import device_info from unittest.mock import MagicMock, patch -from tests.route_check_test_data import APPL_DB, ARGS, ASIC_DB, CONFIG_DB, DEFAULT_CONFIG_DB, DESCR, OP_DEL, OP_SET, PRE, RESULT, RET, TEST_DATA, UPD, FRR_ROUTES +from tests.route_check_test_data import APPL_DB, ARGS, ASIC_DB, CONFIG_DB, DEFAULT_CONFIG_DB, DESCR, OP_DEL, OP_SET, PRE, RESULT, RET, TEST_DATA, UPD import pytest @@ -239,7 +239,6 @@ def setup(self): def init(self): route_check.UNIT_TESTING = 1 - route_check.FRR_WAIT_TIME = 0 @pytest.fixture def force_hang(self): @@ -259,8 +258,7 @@ def mock_dbs(self): patch("route_check.swsscommon.Table") as mock_table, \ patch("route_check.swsscommon.Select") as mock_sel, \ patch("route_check.swsscommon.SubscriberStateTable") as mock_subs, \ - patch("route_check.swsscommon.ConfigDBConnector", return_value=mock_config_db), \ - patch("route_check.swsscommon.NotificationProducer"): + patch("route_check.swsscommon.ConfigDBConnector", return_value=mock_config_db): device_info.get_platform = MagicMock(return_value='unittest') set_mock(mock_table, mock_conn, mock_sel, mock_subs, mock_config_db) yield @@ -274,16 +272,7 @@ def test_route_check(self, mock_dbs, test_num): set_test_case_data(ct_data) logger.info("Running test case {}: {}".format(test_num, ct_data[DESCR])) - with patch('sys.argv', ct_data[ARGS].split()), \ - patch('route_check.subprocess.check_output') as mock_check_output: - - routes = ct_data.get(FRR_ROUTES, {}) - - def side_effect(*args, **kwargs): - return json.dumps(routes) - - mock_check_output.side_effect = side_effect - + with patch('sys.argv', ct_data[ARGS].split()): ret, res = route_check.main() expect_ret = ct_data[RET] if RET in ct_data else 0 expect_res = ct_data[RESULT] if RESULT in ct_data else None diff --git a/tests/route_check_test_data.py b/tests/route_check_test_data.py index 7ed1eee41f..9e4cd3a009 100644 --- a/tests/route_check_test_data.py +++ b/tests/route_check_test_data.py @@ -6,7 +6,6 @@ CONFIG_DB = 4 PRE = "pre-value" UPD = "update" -FRR_ROUTES = "frr-routes" RESULT = "res" OP_SET = "SET" @@ -360,124 +359,5 @@ } } } - }, - "10": { - DESCR: "basic good one, check FRR routes", - ARGS: "route_check -m INFO -i 1000", - PRE: { - APPL_DB: { - ROUTE_TABLE: { - "0.0.0.0/0" : { "ifname": "portchannel0" }, - "10.10.196.12/31" : { "ifname": "portchannel0" }, - }, - INTF_TABLE: { - "PortChannel1013:10.10.196.24/31": {}, - "PortChannel1023:2603:10b0:503:df4::5d/126": {}, - "PortChannel1024": {} - } - }, - ASIC_DB: { - RT_ENTRY_TABLE: { - RT_ENTRY_KEY_PREFIX + "10.10.196.12/31" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "10.10.196.24/32" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "2603:10b0:503:df4::5d/128" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "0.0.0.0/0" + RT_ENTRY_KEY_SUFFIX: {} - } - }, - }, - FRR_ROUTES: { - "0.0.0.0/0": [ - { - "prefix": "0.0.0.0/0", - "vrfName": "default", - "protocol": "bgp", - "offloaded": "true", - }, - ], - "10.10.196.12/31": [ - { - "prefix": "10.10.196.12/31", - "vrfName": "default", - "protocol": "bgp", - "offloaded": "true", - }, - ], - "10.10.196.24/31": [ - { - "protocol": "connected", - }, - ], - }, - }, - "11": { - DESCR: "failure test case, missing FRR routes", - ARGS: "route_check -m INFO -i 1000", - PRE: { - APPL_DB: { - ROUTE_TABLE: { - "0.0.0.0/0" : { "ifname": "portchannel0" }, - "10.10.196.12/31" : { "ifname": "portchannel0" }, - }, - INTF_TABLE: { - "PortChannel1013:10.10.196.24/31": {}, - "PortChannel1023:2603:10b0:503:df4::5d/126": {}, - "PortChannel1024": {} - } - }, - ASIC_DB: { - RT_ENTRY_TABLE: { - RT_ENTRY_KEY_PREFIX + "10.10.196.12/31" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "10.10.196.24/32" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "2603:10b0:503:df4::5d/128" + RT_ENTRY_KEY_SUFFIX: {}, - RT_ENTRY_KEY_PREFIX + "0.0.0.0/0" + RT_ENTRY_KEY_SUFFIX: {} - } - }, - }, - FRR_ROUTES: { - "0.0.0.0/0": [ - { - "prefix": "0.0.0.0/0", - "vrfName": "default", - "protocol": "bgp", - "offloaded": "true", - }, - ], - "10.10.196.12/31": [ - { - "prefix": "10.10.196.12/31", - "vrfName": "default", - "protocol": "bgp", - }, - ], - "10.10.196.24/31": [ - { - "protocol": "connected", - }, - ], - }, - RESULT: { - "missed_FRR_routes": [ - {"prefix": "10.10.196.12/31", "vrfName": "default", "protocol": "bgp"} - ], - }, - RET: -1, - }, - "10": { - DESCR: "basic good one with IPv6 address", - ARGS: "route_check -m INFO -i 1000", - PRE: { - APPL_DB: { - ROUTE_TABLE: { - }, - INTF_TABLE: { - "PortChannel1013:2000:31:0:0::1/64": {}, - } - }, - ASIC_DB: { - RT_ENTRY_TABLE: { - RT_ENTRY_KEY_PREFIX + "2000:31::1/128" + RT_ENTRY_KEY_SUFFIX: {}, - } - } - } - }, + } }