From 106d277c7ca25434f4a1c41416a650c58c17f46d Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Wed, 10 May 2023 18:51:46 +0800 Subject: [PATCH 1/2] Update dscp remapping cases for Nvidia platforms This commit contains the fix for two test cases in DSCP remapping test. The reason that we need this fix is Nvidia implementation for DSCP remapping is different from Community due to the limitation that Nvidia platforms can't do the remap directly on tunnel. For test case test_tunnel_decap_dscp_to_pg_mapping: 1. Add a new mapping file tunnel_qos_map_nvidia.json and use it when testing Nvidia platforms. 2. Use cell size 144 for Nvidia platforms. 3. Use outer DSCP instead of inner DSCP. 4. Add some logs for easier debugging. For the xoff test case: 1. Add test data in qos.yml file for 4600C. 2. Stop packet aging in buffer before test. 3. Skip filling the leak on Nvidia platforms for it is not needed. 4. Use cell size 144 and packet length 300, small packets will cause descriptor exhaustion. Change-Id: If0090bd5bd664222cc7399598f7f46c10acebd1f --- tests/common/devices/sonic.py | 2 + tests/qos/files/qos.yml | 35 +++ tests/qos/files/tunnel_qos_map_nvidia.json | 263 +++++++++++++++++++++ tests/qos/test_tunnel_qos_remap.py | 9 +- tests/qos/tunnel_qos_remap_base.py | 51 +++- tests/saitests/py3/sai_qos_tests.py | 47 ++-- 6 files changed, 378 insertions(+), 29 deletions(-) create mode 100644 tests/qos/files/tunnel_qos_map_nvidia.json diff --git a/tests/common/devices/sonic.py b/tests/common/devices/sonic.py index e46fafe5d50..ee1a35bb5b2 100644 --- a/tests/common/devices/sonic.py +++ b/tests/common/devices/sonic.py @@ -1657,6 +1657,8 @@ def get_asic_name(self): asic = "th3" elif "Cisco Systems Inc Device a001" in output: asic = "gb" + elif "Mellanox Technologies" in output: + asic = "spc" return asic diff --git a/tests/qos/files/qos.yml b/tests/qos/files/qos.yml index eb0460eaac1..c9ce31f6021 100644 --- a/tests/qos/files/qos.yml +++ b/tests/qos/files/qos.yml @@ -17,6 +17,41 @@ # xoff_1 for 50G # xoff_2 for 100G qos_params: + # This block is only for dualtor pcbb xoff test on Nvidia platforms + spc3: + topo-dualtor: + 100000_40m: + pkts_num_leak_out: 0 + pcbb_xoff_1: + dscp: 3 + ecn: 1 + pg: 3 + pkts_num_trig_pfc: 176064 + pkts_num_trig_ingr_drp: 177916 + pkts_num_margin: 4 + pcbb_xoff_2: + dscp: 4 + ecn: 1 + pg: 4 + pkts_num_trig_pfc: 176064 + pkts_num_trig_ingr_drp: 177916 + pkts_num_margin: 4 + pcbb_xoff_3: + outer_dscp: 2 + dscp: 3 + ecn: 1 + pg: 2 + pkts_num_trig_pfc: 176064 + pkts_num_trig_ingr_drp: 177916 + pkts_num_margin: 4 + pcbb_xoff_4: + outer_dscp: 6 + dscp: 4 + ecn: 1 + pg: 6 + pkts_num_trig_pfc: 176064 + pkts_num_trig_ingr_drp: 177916 + pkts_num_margin: 4 mellanox: topo-any: profile: diff --git a/tests/qos/files/tunnel_qos_map_nvidia.json b/tests/qos/files/tunnel_qos_map_nvidia.json new file mode 100644 index 00000000000..360e9d8ac89 --- /dev/null +++ b/tests/qos/files/tunnel_qos_map_nvidia.json @@ -0,0 +1,263 @@ +{ + "DSCP_TO_TC_MAP": { + "AZURE": { + "0": "1", + "1": "1", + "10": "1", + "11": "1", + "12": "1", + "13": "1", + "14": "1", + "15": "1", + "16": "1", + "17": "1", + "18": "1", + "19": "1", + "2": "1", + "20": "1", + "21": "1", + "22": "1", + "23": "1", + "24": "1", + "25": "1", + "26": "1", + "27": "1", + "28": "1", + "29": "1", + "3": "3", + "30": "1", + "31": "1", + "32": "1", + "33": "8", + "34": "1", + "35": "1", + "36": "1", + "37": "1", + "38": "1", + "39": "1", + "4": "4", + "40": "1", + "41": "1", + "42": "1", + "43": "1", + "44": "1", + "45": "1", + "46": "5", + "47": "1", + "48": "7", + "49": "1", + "5": "1", + "50": "1", + "51": "1", + "52": "1", + "53": "1", + "54": "1", + "55": "1", + "56": "1", + "57": "1", + "58": "1", + "59": "1", + "6": "1", + "60": "1", + "61": "1", + "62": "1", + "63": "1", + "7": "1", + "8": "0", + "9": "1" + }, + "AZURE_TUNNEL": { + "0": "1", + "1": "1", + "10": "1", + "11": "1", + "12": "1", + "13": "1", + "14": "1", + "15": "1", + "16": "1", + "17": "1", + "18": "1", + "19": "1", + "2": "1", + "20": "1", + "21": "1", + "22": "1", + "23": "1", + "24": "1", + "25": "1", + "26": "1", + "27": "1", + "28": "1", + "29": "1", + "3": "3", + "30": "1", + "31": "1", + "32": "1", + "33": "8", + "34": "1", + "35": "1", + "36": "1", + "37": "1", + "38": "1", + "39": "1", + "4": "4", + "40": "1", + "41": "1", + "42": "1", + "43": "1", + "44": "1", + "45": "1", + "46": "5", + "47": "1", + "48": "7", + "49": "1", + "5": "1", + "50": "1", + "51": "1", + "52": "1", + "53": "1", + "54": "1", + "55": "1", + "56": "1", + "57": "1", + "58": "1", + "59": "1", + "6": "1", + "60": "1", + "61": "1", + "62": "1", + "63": "1", + "7": "1", + "8": "0", + "9": "1" + }, + "AZURE_UPLINK": { + "0": "1", + "1": "1", + "10": "1", + "11": "1", + "12": "1", + "13": "1", + "14": "1", + "15": "1", + "16": "1", + "17": "1", + "18": "1", + "19": "1", + "2": "2", + "20": "1", + "21": "1", + "22": "1", + "23": "1", + "24": "1", + "25": "1", + "26": "1", + "27": "1", + "28": "1", + "29": "1", + "3": "3", + "30": "1", + "31": "1", + "32": "1", + "33": "8", + "34": "1", + "35": "1", + "36": "1", + "37": "1", + "38": "1", + "39": "1", + "4": "4", + "40": "1", + "41": "1", + "42": "1", + "43": "1", + "44": "1", + "45": "1", + "46": "5", + "47": "1", + "48": "7", + "49": "1", + "5": "1", + "50": "1", + "51": "1", + "52": "1", + "53": "1", + "54": "1", + "55": "1", + "56": "1", + "57": "1", + "58": "1", + "59": "1", + "6": "6", + "60": "1", + "61": "1", + "62": "1", + "63": "1", + "7": "1", + "8": "0", + "9": "1" + } + }, + "TC_TO_PRIORITY_GROUP_MAP": { + "AZURE": { + "0": "0", + "1": "0", + "2": "2", + "3": "3", + "4": "4", + "5": "0", + "6": "6", + "7": "0", + "8": "0" + }, + "AZURE_TUNNEL": { + "0": "0", + "1": "0", + "2": "0", + "3": "2", + "4": "6", + "5": "0", + "6": "0", + "7": "0", + "8": "0" + } + }, + "TC_TO_DSCP_MAP": { + "AZURE_TUNNEL": { + "0": "8", + "1": "0", + "2": "2", + "3": "2", + "4": "6", + "5": "46", + "6": "6", + "7": "48", + "8": "33" + } + }, + "TC_TO_QUEUE_MAP": { + "AZURE": { + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "1" + }, + "AZURE_TUNNEL": { + "0": "0", + "1": "1", + "2": "2", + "3": "2", + "4": "6", + "5": "5", + "6": "6", + "7": "7", + "8": "1" + } + } +} diff --git a/tests/qos/test_tunnel_qos_remap.py b/tests/qos/test_tunnel_qos_remap.py index ffe4d89437d..4f60fc554fa 100644 --- a/tests/qos/test_tunnel_qos_remap.py +++ b/tests/qos/test_tunnel_qos_remap.py @@ -23,7 +23,8 @@ from .tunnel_qos_remap_base import build_testing_packet, check_queue_counter,\ dut_config, qos_config, load_tunnel_qos_map, run_ptf_test, toggle_mux_to_host,\ setup_module, update_docker_services, swap_syncd, counter_poll_config # noqa F401 -from .tunnel_qos_remap_base import leaf_fanout_peer_info, start_pfc_storm, stop_pfc_storm, get_queue_counter +from tunnel_qos_remap_base import leaf_fanout_peer_info, start_pfc_storm, \ + stop_pfc_storm, get_queue_counter, disable_packet_aging # noqa F401 from ptf import testutils from ptf.testutils import simple_tcp_packet from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts # noqa F401 @@ -474,10 +475,12 @@ def test_tunnel_decap_dscp_to_pg_mapping(rand_selected_dut, ptfhost, dut_config, # TODO: Get the cell size for other ASIC if asic == 'th2': cell_size = 208 + elif 'spc' in asic: + cell_size = 144 else: cell_size = 256 - tunnel_qos_map = load_tunnel_qos_map() + tunnel_qos_map = load_tunnel_qos_map(asic_name=asic) test_params = dict() test_params.update({ "src_port_id": dut_config["lag_port_ptf_id"], @@ -531,6 +534,8 @@ def test_xoff_for_pcbb(rand_selected_dut, ptfhost, dut_config, qos_config, xoff_ "platform_asic": dut_config["platform_asic"], "sonic_asic_type": dut_config["asic_type"], }) + if dut_config["asic_type"] == 'mellanox': + test_params.update({'cell_size': 144, 'packet_size': 300}) # Update qos config into test_params test_params.update(qos_config[xoff_profile]) # Run test on ptfhost diff --git a/tests/qos/tunnel_qos_remap_base.py b/tests/qos/tunnel_qos_remap_base.py index 0778702e3ec..f8b24d06a9f 100644 --- a/tests/qos/tunnel_qos_remap_base.py +++ b/tests/qos/tunnel_qos_remap_base.py @@ -101,22 +101,32 @@ def counter_poll_config(duthost, type, interval_ms): time.sleep(10) -def load_tunnel_qos_map(): +def load_tunnel_qos_map(asic_name=None): """ Read DSCP_TO_TC_MAP/TC_TO_PRIORITY_GROUP_MAP/TC_TO_DSCP_MAP/TC_TO_QUEUE_MAP from file return a dict """ - TUNNEL_QOS_MAP_FILENAME = r"qos/files/tunnel_qos_map.json" + is_nvidia_platform = asic_name is not None and 'spc' in asic_name + if not is_nvidia_platform: + TUNNEL_QOS_MAP_FILENAME = r"qos/files/tunnel_qos_map.json" + else: + TUNNEL_QOS_MAP_FILENAME = r"qos/files/tunnel_qos_map_nvidia.json" TUNNEL_MAP_NAME = "AZURE_TUNNEL" + UPLINK_MAP_NAME = "AZURE_UPLINK" MAP_NAME = "AZURE" ret = {} with open(TUNNEL_QOS_MAP_FILENAME, "r") as f: maps = json.load(f) # inner_dscp_to_pg map, a map for mapping dscp to priority group at decap side ret['inner_dscp_to_pg_map'] = {} - for k, v in list(maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items()): - ret['inner_dscp_to_pg_map'][int(k)] = int( - maps['TC_TO_PRIORITY_GROUP_MAP'][TUNNEL_MAP_NAME][v]) + if is_nvidia_platform: + for k, v in maps['DSCP_TO_TC_MAP'][UPLINK_MAP_NAME].items(): + ret['inner_dscp_to_pg_map'][int(k)] = int( + maps['TC_TO_PRIORITY_GROUP_MAP'][MAP_NAME][v]) + else: + for k, v in maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items(): + ret['inner_dscp_to_pg_map'][int(k)] = int( + maps['TC_TO_PRIORITY_GROUP_MAP'][TUNNEL_MAP_NAME][v]) # inner_dscp_to_outer_dscp_map, a map for rewriting DSCP in the encapsulated packets ret['inner_dscp_to_outer_dscp_map'] = {} for k, v in list(maps['DSCP_TO_TC_MAP'][MAP_NAME].items()): @@ -124,9 +134,14 @@ def load_tunnel_qos_map(): maps['TC_TO_DSCP_MAP'][TUNNEL_MAP_NAME][v]) # inner_dscp_to_queue_map, a map for mapping the tunnel traffic to egress queue at decap side ret['inner_dscp_to_queue_map'] = {} - for k, v in list(maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items()): - ret['inner_dscp_to_queue_map'][int(k)] = int( - maps['TC_TO_QUEUE_MAP'][MAP_NAME][v]) + if is_nvidia_platform: + for k, v in maps['DSCP_TO_TC_MAP'][UPLINK_MAP_NAME].items(): + ret['inner_dscp_to_queue_map'][int(k)] = int( + maps['TC_TO_QUEUE_MAP'][MAP_NAME][v]) + else: + for k, v in maps['DSCP_TO_TC_MAP'][TUNNEL_MAP_NAME].items(): + ret['inner_dscp_to_queue_map'][int(k)] = int( + maps['TC_TO_QUEUE_MAP'][MAP_NAME][v]) return ret @@ -240,6 +255,26 @@ def qos_config(rand_selected_dut, tbinfo, dut_config): return qos_configs['qos_params'][dut_asic][dut_topo][profile_name] +@pytest.fixture(scope='module', autouse=True) +def disable_packet_aging(rand_selected_dut, duthosts): + for duthost in duthosts: + asic = duthost.get_asic_name() + if 'spc' in asic: + logger.info("Disable Mellanox packet aging") + duthost.copy(src="qos/files/mellanox/packets_aging.py", dest="/tmp") + duthost.command("docker cp /tmp/packets_aging.py syncd:/") + duthost.command("docker exec syncd python /packets_aging.py disable") + + yield + + for duthost in duthosts: + asic = duthost.get_asic_name() + if 'spc' in asic: + logger.info("Enable Mellanox packet aging") + duthost.command("docker exec syncd python /packets_aging.py enable") + duthost.command("docker exec syncd rm -rf /packets_aging.py") + + def _create_ssh_tunnel_to_syncd_rpc(duthost): dut_asic = duthost.asic_instance() dut_asic.create_ssh_tunnel_sai_rpc() diff --git a/tests/saitests/py3/sai_qos_tests.py b/tests/saitests/py3/sai_qos_tests.py index 2a94521b6de..a51a0219cc1 100644 --- a/tests/saitests/py3/sai_qos_tests.py +++ b/tests/saitests/py3/sai_qos_tests.py @@ -789,40 +789,45 @@ def runTest(self): # There are packet leak even port tx is disabled (18 packets leak on TD3 found) # Hence we send some packet to fill the leak before testing - for dscp, _ in dscp_to_pg_map.items(): - pkt = self._build_testing_pkt( - active_tor_mac=active_tor_mac, - standby_tor_mac=standby_tor_mac, - active_tor_ip=active_tor_ip, - standby_tor_ip=standby_tor_ip, - inner_dscp=dscp, - outer_dscp=0, - dst_ip=dst_port_ip - ) - send_packet(self, src_port_id, pkt, 20) - time.sleep(10) - - for dscp, pg in dscp_to_pg_map.items(): + if asic_type != 'mellanox': + for dscp, _ in dscp_to_pg_map.items(): + pkt = self._build_testing_pkt( + active_tor_mac=active_tor_mac, + standby_tor_mac=standby_tor_mac, + active_tor_ip=active_tor_ip, + standby_tor_ip=standby_tor_ip, + inner_dscp=dscp, + outer_dscp=0, + dst_ip=dst_port_ip + ) + send_packet(self, src_port_id, pkt, 20) + time.sleep(10) + + for inner_dscp, pg in dscp_to_pg_map.items(): + logging.info("Iteration: inner_dscp:{}, pg: {}".format(inner_dscp, pg)) # Build and send packet to active tor. # The inner DSCP is set to testing value, # and the outer DSCP is set to 0 as it has no impact on remapping + # On Nvidia platforms, the dscp mode is pipe and the PG is determined by the outer dscp before decap + outer_dscp = inner_dscp if asic_type == 'mellanox' else 0 pkt = self._build_testing_pkt( active_tor_mac=active_tor_mac, standby_tor_mac=standby_tor_mac, active_tor_ip=active_tor_ip, standby_tor_ip=standby_tor_ip, - inner_dscp=dscp, - outer_dscp=0, + inner_dscp=inner_dscp, + outer_dscp=outer_dscp, dst_ip=dst_port_ip ) pg_shared_wm_res_base = sai_thrift_read_pg_shared_watermark( self.client, asic_type, port_list[src_port_id]) + logging.info(pg_shared_wm_res_base) send_packet(self, src_port_id, pkt, PKT_NUM) # validate pg counters increment by the correct pkt num time.sleep(8) pg_shared_wm_res = sai_thrift_read_pg_shared_watermark( self.client, asic_type, port_list[src_port_id]) - + logging.info(pg_shared_wm_res) assert (pg_shared_wm_res[pg] - pg_shared_wm_res_base[pg] <= (PKT_NUM + ERROR_TOLERANCE[pg]) * cell_size) assert (pg_shared_wm_res[pg] - pg_shared_wm_res_base[pg] @@ -4616,7 +4621,11 @@ def runTest(self): pkts_num_margin = int(self.test_params['pkts_num_margin']) else: pkts_num_margin = 2 - + if 'cell_size' in self.test_params: + cell_size = self.test_params['cell_size'] + cell_occupancy = (packet_size + cell_size - 1) // cell_size + else: + cell_occupancy = 1 try: # Disable tx on EGRESS port so that headroom buffer cannot be free self.sai_thrift_port_tx_disable( @@ -4648,7 +4657,7 @@ def runTest(self): packet_size=packet_size) # Send packets short of triggering pfc - send_packet(self, src_port_id, pkt, pkts_num_trig_pfc) + send_packet(self, src_port_id, pkt, pkts_num_trig_pfc // cell_occupancy - 1 - pkts_num_margin) time.sleep(8) # Read TX_OK again to calculate leaked packet number tx_counters, _ = sai_thrift_read_port_counters( From 43fdd992764c722fc3e079b7de751e8349f7fa7c Mon Sep 17 00:00:00 2001 From: Cong Hou Date: Mon, 12 Jun 2023 10:29:37 +0800 Subject: [PATCH 2/2] Add a comment for disable_packet_aging method For Nvidia(Mellanox) platforms, packets in buffer will be aged after a timeout. Need to disable this before any buffer tests. --- tests/qos/tunnel_qos_remap_base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/qos/tunnel_qos_remap_base.py b/tests/qos/tunnel_qos_remap_base.py index f8b24d06a9f..9eb5138fbcf 100644 --- a/tests/qos/tunnel_qos_remap_base.py +++ b/tests/qos/tunnel_qos_remap_base.py @@ -257,6 +257,10 @@ def qos_config(rand_selected_dut, tbinfo, dut_config): @pytest.fixture(scope='module', autouse=True) def disable_packet_aging(rand_selected_dut, duthosts): + """ + For Nvidia(Mellanox) platforms, packets in buffer will be aged after a timeout. Need to disable this + before any buffer tests. + """ for duthost in duthosts: asic = duthost.get_asic_name() if 'spc' in asic: