From 27da28c1bd2c5d83f60d734c5ffe3cd473cf7352 Mon Sep 17 00:00:00 2001 From: AharonMalkin <94370721+AharonMalkin@users.noreply.github.com> Date: Tue, 10 Oct 2023 19:39:10 +0300 Subject: [PATCH 01/10] Fixed log handling issues on fast/warm upgrade test (#10162) Fix reboot type source fetch logic: Previously, the reboot type source was based on the test name. However, it doesn't work with the upgrade path test, as it does not include the reboot type in the name of the test. This fix treats the mentioned case by getting the name from the upgrade type parameter. --- tests/platform_tests/conftest.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/platform_tests/conftest.py b/tests/platform_tests/conftest.py index 2e047a9daf..40ab0858e3 100644 --- a/tests/platform_tests/conftest.py +++ b/tests/platform_tests/conftest.py @@ -450,9 +450,13 @@ def advanceboot_loganalyzer(duthosts, enum_rand_one_per_hwsku_frontend_hostname, """ duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname] test_name = request.node.name - if "warm" in test_name: + if "upgrade_path" in test_name: + reboot_type_source = request.config.getoption("--upgrade_type") + else: + reboot_type_source = test_name + if "warm" in reboot_type_source: reboot_type = "warm" - elif "fast" in test_name: + elif "fast" in reboot_type_source: reboot_type = "fast" else: reboot_type = "unknown" From 7c57da1e7cb5ba2502c71ef5964d5def3356f4db Mon Sep 17 00:00:00 2001 From: "Nana@Nvidia" <78413612+nhe-NV@users.noreply.github.com> Date: Wed, 11 Oct 2023 00:42:23 +0800 Subject: [PATCH 02/10] [Mellanox] add mock for temperature threshold in psu power threshold (#9902) Enhance test case test_psu_power_threshold - add mock for temperature threshold at the beginning of the test. The temperature on some setup may exceed the threshold at the beginning of the test start, and will cause test failure. The temperature on some setup may exceed the threshold at the beginning of the test start, and will cause test failure. Enhance test case test_psu_power_threshold with adding mock for temperature threshold at the beginning of the test --- .../mellanox_thermal_control_test_helper.py | 6 ++ .../mellanox/test_psu_power_threshold.py | 62 +++++++++++-------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py b/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py index caf97014fa..a48eac11de 100644 --- a/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py +++ b/tests/platform_tests/mellanox/mellanox_thermal_control_test_helper.py @@ -1355,6 +1355,12 @@ def mock_fan_ambient_thermal(self, temperature): def mock_port_ambient_thermal(self, temperature): self.mock_helper.mock_value(self.PORT_AMBIENT_TEMP, int(temperature)) + def mock_ambient_temp_critical_threshold(self, temperature): + self.mock_helper.mock_value(self.AMBIENT_TEMP_CRITICAL_THRESHOLD, int(temperature)) + + def mock_ambient_temp_warning_threshold(self, temperature): + self.mock_helper.mock_value(self.AMBIENT_TEMP_WARNING_THRESHOLD, int(temperature)) + def read_psu_power_threshold(self, psu): return int(self.mock_helper.read_value(self.PSU_POWER_CAPACITY.format(psu))) diff --git a/tests/platform_tests/mellanox/test_psu_power_threshold.py b/tests/platform_tests/mellanox/test_psu_power_threshold.py index 6ade7c7b39..9a1c220a32 100644 --- a/tests/platform_tests/mellanox/test_psu_power_threshold.py +++ b/tests/platform_tests/mellanox/test_psu_power_threshold.py @@ -21,22 +21,15 @@ MAX_PSUS = None -@pytest.fixture -# We can not set it as module because mocker_factory is function scope -def mock_power_threshold(request, duthosts, rand_one_dut_hostname, mocker_factory): # noqa F811 - global mocker +@pytest.fixture(autouse=True) +def check_feature_supported(request, duthosts, rand_one_dut_hostname, mocker_factory): # noqa F811 global MAX_PSUS - - psudaemon_restarted = False - + global mocker duthost = duthosts[rand_one_dut_hostname] platform_data = get_platform_data(duthost) MAX_PSUS = platform_data['psus']['number'] - - mocker = mocker_factory(duthost, 'PsuPowerThresholdMocker') - all_psus_supporting_thresholds = True - + mocker = mocker_factory(duthost, 'PsuPowerThresholdMocker') try: for psu_index in range(MAX_PSUS): mocker.read_psu_power_threshold(psu_index + 1) @@ -44,23 +37,30 @@ def mock_power_threshold(request, duthosts, rand_one_dut_hostname, mocker_factor except Exception: all_psus_supporting_thresholds = False - if all_psus_supporting_thresholds: - try: - ambient_critical_threshold = None - ambient_warning_threshold = None - ambient_critical_threshold = mocker.read_ambient_temp_critical_threshold() - ambient_warning_threshold = mocker.read_ambient_temp_warning_threshold() - except Exception: - pytest.fail('Some required information does not exist (ambient thresholds critical {} warning {})'.format( - ambient_critical_threshold, - ambient_warning_threshold)) + MockPlatform = request.config.getoption("--mock_any_testbed") + + if MockPlatform and all_psus_supporting_thresholds: + pytest.fail('CLI option "--mock_any_testbed" is provided while power thresholds are supported on both PSUs') + + if not (all_psus_supporting_thresholds or MockPlatform): + pytest.skip('PSU power threshold is not supported') + + +@pytest.fixture +def mock_ambient_temp_threshold(): + ambient_temp_critical_threshold = 60000 + ambient_temp_warning_threshold = 50000 + mocker.mock_ambient_temp_critical_threshold(ambient_temp_critical_threshold) + mocker.mock_ambient_temp_warning_threshold(ambient_temp_warning_threshold) + + +@pytest.fixture +def mock_power_threshold(request, duthosts, rand_one_dut_hostname, mock_ambient_temp_threshold): # noqa F811 + psudaemon_restarted = False + duthost = duthosts[rand_one_dut_hostname] MockPlatform = request.config.getoption("--mock_any_testbed") if MockPlatform: - if all_psus_supporting_thresholds: - logger.info( - 'CLI option "--mock_any_testbed" is provided while power thresholds are supported on both PSUs') - logger.info('Mocking the system to support PSU power threshold') mocker.mock_power_threshold(MAX_PSUS) @@ -69,8 +69,16 @@ def mock_power_threshold(request, duthosts, rand_one_dut_hostname, mocker_factor duthost.shell('docker exec pmon supervisorctl restart psud') psudaemon_restarted = True time.sleep(2) - elif not all_psus_supporting_thresholds: - pytest.skip('PSU power threshold is not supported') + else: + try: + ambient_critical_threshold = None + ambient_warning_threshold = None + ambient_critical_threshold = mocker.read_ambient_temp_critical_threshold() + ambient_warning_threshold = mocker.read_ambient_temp_warning_threshold() + except Exception: + pytest.fail('Some required information does not exist (ambient thresholds critical {} warning {})'.format( + ambient_critical_threshold, + ambient_warning_threshold)) yield From acc686e8c12b73f7c00cfca7559c80f802e65d03 Mon Sep 17 00:00:00 2001 From: "Nana@Nvidia" <78413612+nhe-NV@users.noreply.github.com> Date: Wed, 11 Oct 2023 00:44:43 +0800 Subject: [PATCH 03/10] fix auto-negotiation unstable issue (#9813) For Nvidia device, only support to raise link Force<->Force or Auto-negotiation<->Auto-negotiation flows For other scenario the port may not come up in timeout, but could come up in longer time, for example may need 120s. --- tests/platform_tests/test_auto_negotiation.py | 121 ++++++++++-------- 1 file changed, 67 insertions(+), 54 deletions(-) diff --git a/tests/platform_tests/test_auto_negotiation.py b/tests/platform_tests/test_auto_negotiation.py index f17e3224fa..52bbb689a1 100644 --- a/tests/platform_tests/test_auto_negotiation.py +++ b/tests/platform_tests/test_auto_negotiation.py @@ -8,6 +8,7 @@ """ import logging import pytest +import contextlib from tests.common.config_reload import config_reload from tests.common.helpers.assertions import pytest_assert, pytest_require @@ -125,6 +126,16 @@ def recover_ports(duthosts, fanouthosts): config_reload(duthost) +@contextlib.contextmanager +def shutdown_port_on_duthost(duthost, iface): + """Shutdown interface on a device. and start up the port when the context finish """ + if is_mellanox_device(duthost): + duthost.shell("config interface shutdown {}".format(iface)) + yield + if is_mellanox_device(duthost): + duthost.shell("config interface startup {}".format(iface)) + + def check_ports_up(duthost, dut_ports, expect_speed=None): """Check if given ports are operational up or not @@ -172,24 +183,25 @@ def test_auto_negotiation_advertised_speeds_all(enum_dut_portname_module_fixture skip_if_no_multi_speed_adv_support(fanout, fanout_port) logger.info('Start test for DUT port {} and fanout port {}'.format(dut_port, fanout_port)) - success = fanout.set_auto_negotiation_mode(fanout_port, True) - pytest_require(success, 'Failed to set autoneg mode on fanout. Fanout: {}, port: {}'.format(fanout, fanout_port)) - - # Advertise all supported speeds in fanout port - success = fanout.set_speed(fanout_port, None) - pytest_require( - success, - 'Failed to advertise all speeds on fanout. Fanout: {}, port: {}'.format(fanout, fanout_port) - ) - - if dut_all_speeds_option == SPEEDS_BY_LITERAL: - all_speeds = 'all' - else: - all_speeds = ','.join(duthost.get_supported_speeds(portname)) - - duthost.shell('config interface autoneg {} enabled'.format(dut_port)) - duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, all_speeds)) - + with shutdown_port_on_duthost(duthost, dut_port): + success = fanout.set_auto_negotiation_mode(fanout_port, True) + pytest_require(success, 'Failed to set autoneg mode on fanout. Fanout: {}, port: {}'.format(fanout, + fanout_port)) + + # Advertise all supported speeds in fanout port + success = fanout.set_speed(fanout_port, None) + pytest_require( + success, + 'Failed to advertise all speeds on fanout. Fanout: {}, port: {}'.format(fanout, fanout_port) + ) + + if dut_all_speeds_option == SPEEDS_BY_LITERAL: + all_speeds = 'all' + else: + all_speeds = ','.join(duthost.get_supported_speeds(portname)) + + duthost.shell('config interface autoneg {} enabled'.format(dut_port)) + duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, all_speeds)) logger.info('Wait until all ports are up') wait_result = wait_until( ALL_PORT_WAIT_TIME, @@ -224,15 +236,16 @@ def test_auto_negotiation_dut_advertises_each_speed(enum_speed_per_dutport_fixtu ) logger.info('Start test for DUT port {} and fanout port {}'.format(dut_port, fanout_port)) - success = fanout.set_auto_negotiation_mode(fanout_port, True) - pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) + with shutdown_port_on_duthost(duthost, dut_port): + success = fanout.set_auto_negotiation_mode(fanout_port, True) + pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) - # Advertise all supported speeds in fanout port - success = fanout.set_speed(fanout_port, None) - pytest_require(success, 'Failed to advertise all speeds on fanout port {}'.format(fanout_port)) + # Advertise all supported speeds in fanout port + success = fanout.set_speed(fanout_port, None) + pytest_require(success, 'Failed to advertise all speeds on fanout port {}'.format(fanout_port)) - duthost.shell('config interface autoneg {} enabled'.format(dut_port)) - duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, speed)) + duthost.shell('config interface autoneg {} enabled'.format(dut_port)) + duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, speed)) logger.info('Wait until the port status is up, expected speed: {}'.format(speed)) wait_result = wait_until( SINGLE_PORT_WAIT_TIME, @@ -261,25 +274,25 @@ def test_auto_negotiation_fanout_advertises_each_speed(enum_speed_per_dutport_fi duthost, dut_port, fanout, fanout_port = all_ports_by_dut[dutname][portname] logger.info('Start test for DUT port {} and fanout port {}'.format(dut_port, fanout_port)) - - if dut_all_speeds_option == SPEEDS_BY_LITERAL: - dut_advertised_speeds = 'all' - else: - dut_advertised_speeds = ','.join(duthost.get_supported_speeds(portname)) - - speed = enum_speed_per_dutport_fixture['speed'] - pytest_require( - is_sfp_speed_supported(duthost, portname, speed), - 'Speed {} is not supported for given port/SFP'.format(speed) - ) - - duthost.shell('config interface autoneg {} enabled'.format(dut_port)) - duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, dut_advertised_speeds)) - - success = fanout.set_auto_negotiation_mode(fanout_port, True) - pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) - success = fanout.set_speed(fanout_port, speed) - pytest_require(success, 'Failed to advertised speeds on fanout port {}, speed {}'.format(fanout_port, speed)) + with shutdown_port_on_duthost(duthost, dut_port): + if dut_all_speeds_option == SPEEDS_BY_LITERAL: + dut_advertised_speeds = 'all' + else: + dut_advertised_speeds = ','.join(duthost.get_supported_speeds(portname)) + + speed = enum_speed_per_dutport_fixture['speed'] + pytest_require( + is_sfp_speed_supported(duthost, portname, speed), + 'Speed {} is not supported for given port/SFP'.format(speed) + ) + + duthost.shell('config interface autoneg {} enabled'.format(dut_port)) + duthost.shell('config interface advertised-speeds {} {}'.format(dut_port, dut_advertised_speeds)) + + success = fanout.set_auto_negotiation_mode(fanout_port, True) + pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) + success = fanout.set_speed(fanout_port, speed) + pytest_require(success, 'Failed to advertised speeds on fanout port {}, speed {}'.format(fanout_port, speed)) logger.info('Wait until the port status is up, expected speed: {}'.format(speed)) wait_result = wait_until( @@ -327,19 +340,19 @@ def test_force_speed(enum_speed_per_dutport_fixture): logger.info('Start test for DUT port {} and fanout port {}'.format(dut_port, fanout_port)) # Disable auto negotiation on fanout port - success = fanout.set_auto_negotiation_mode(fanout_port, False) - pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) + with shutdown_port_on_duthost(duthost, dut_port): + success = fanout.set_auto_negotiation_mode(fanout_port, False) + pytest_require(success, 'Failed to set port autoneg on fanout port {}'.format(fanout_port)) - success = fanout.set_speed(fanout_port, speed) - pytest_require(success, 'Failed to speed on fanout port {}, speed {}'.format(fanout_port, speed)) + success = fanout.set_speed(fanout_port, speed) + pytest_require(success, 'Failed to speed on fanout port {}, speed {}'.format(fanout_port, speed)) - duthost.shell('config interface autoneg {} disabled'.format(dut_port)) - duthost.shell('config interface speed {} {}'.format(dut_port, speed)) - logger.info('Wait until the port status is up, expected speed: {}'.format(speed)) - - duthost.set_port_fec(dut_port, fec_mode) - fanout.set_port_fec(fanout_port, fec_mode) + duthost.shell('config interface autoneg {} disabled'.format(dut_port)) + duthost.shell('config interface speed {} {}'.format(dut_port, speed)) + duthost.set_port_fec(dut_port, fec_mode) + fanout.set_port_fec(fanout_port, fec_mode) + logger.info('Wait until the port status is up, expected speed: {}'.format(speed)) wait_result = wait_until( SINGLE_PORT_WAIT_TIME, PORT_STATUS_CHECK_INTERVAL, From 40e10c44715b3e09e6f6a4a45137087e5a129a32 Mon Sep 17 00:00:00 2001 From: Cong Hou <97947969+congh-nvidia@users.noreply.github.com> Date: Wed, 11 Oct 2023 00:58:20 +0800 Subject: [PATCH 04/10] Test plan for feature Generic Hash (#7524) Test plan for the new feature Generic Hash. The HLD of Generic Hash: sonic-net/SONiC#1101 The test implementation is not included in this PR, will open another PR. --- docs/testplan/Generic-Hash-test-plan.md | 337 ++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 docs/testplan/Generic-Hash-test-plan.md diff --git a/docs/testplan/Generic-Hash-test-plan.md b/docs/testplan/Generic-Hash-test-plan.md new file mode 100644 index 0000000000..2d8709b530 --- /dev/null +++ b/docs/testplan/Generic-Hash-test-plan.md @@ -0,0 +1,337 @@ +# Generic Hash Test Plan + +## Related documents + +| **Document Name** | **Link** | +|-------------------|----------| +| SONiC Generic Hash | [[https://github.com/sonic-net/SONiC/doc/hash/hash-design.md](https://github.com/sonic-net/SONiC/blob/master/doc/hash/hash-design.md)]| + + +## 1. Overview +The hashing algorithm is used to make traffic-forwarding decisions for traffic exiting the switch. +It makes hashing decisions based on values in various packet fields, as well as on the hash seed value. +The packet fields used by the hashing algorithm varies by the configuration on the switch. + +For ECMP, the hashing algorithm determines how incoming traffic is forwarded to the next-hop device. +For LAG, the hashing algorithm determines how traffic is placed onto the LAG member links to manage +bandwidth by evenly load-balancing traffic across the outgoing links. + +Generic Hash is a feature which allows user to configure which hash fields suppose to be used by hashing algorithm by providing global switch hash configuration for ECMP and LAG. + +The sonic-mgmt generic hash tests validate whether the hash configurations can be applied successfully and the hash behaviour is as expected. + +## 2. Requirements + +### 2.1 The Generic Hash feature supports the following functionality: +1. Ethernet packet hashing configuration with inner/outer IP frames +2. Global switch hash configuration for ECMP and LAG +3. Warm/Fast reboot + +### 2.2 This feature will support the following commands: + +1. config: set switch hash global configuration +2. show: display switch hash global configuration or capabilities + +### 2.3 This feature will provide error handling for the next situations: + +#### 2.3.1 Frontend +**This feature will provide error handling for the next situations:** +1. Invalid parameter value +#### 2.3.2 Backend +**This feature will provide error handling for the next situations:** +1. Missing parameters +2. Invalid parameter value +3. Parameter removal +4. Configuration removal + +## 3. Scope + +The test is to verify the hash configuration can be added/updated by the generic hash, and the ECMP and lag hash behavior will change according to the generic hash configurations. + +### 3.1 Scale / Performance + +No scale or performance test related + +### 3.2 CLI commands + +#### 3.2.1 Config +The following command can be used to configure generic hash: +``` +config +|--- switch-hash + |--- global + |--- ecmp-hash ARGS + |--- lag-hash ARGS +``` + +Examples: +The following command updates switch hash global: +``` +config switch-hash global ecmp-hash \ +'DST_MAC' \ +'SRC_MAC' \ +'ETHERTYPE' \ +'IP_PROTOCOL' \ +'DST_IP' \ +'SRC_IP' \ +'L4_DST_PORT' \ +'L4_SRC_PORT' \ +'INNER_DST_MAC' \ +'INNER_SRC_MAC' \ +'INNER_ETHERTYPE' \ +'INNER_IP_PROTOCOL' \ +'INNER_DST_IP' \ +'INNER_SRC_IP' \ +'INNER_L4_DST_PORT' \ +'INNER_L4_SRC_PORT' +``` +``` +config switch-hash global lag-hash \ +'DST_MAC' \ +'SRC_MAC' \ +'ETHERTYPE' \ +'IP_PROTOCOL' \ +'DST_IP' \ +'SRC_IP' \ +'L4_DST_PORT' \ +'L4_SRC_PORT' \ +'INNER_DST_MAC' \ +'INNER_SRC_MAC' \ +'INNER_ETHERTYPE' \ +'INNER_IP_PROTOCOL' \ +'INNER_DST_IP' \ +'INNER_SRC_IP' \ +'INNER_L4_DST_PORT' \ +'INNER_L4_SRC_PORT' +``` + +#### 3.2.2 Show +The following command shows switch hash global configuration: +``` +show +|--- switch-hash + |--- global + |--- capabilities +``` + +Example: +**The following command shows switch hash global configuration:** +```bash +root@sonic:/home/admin# show switch-hash global +ECMP HASH LAG HASH +----------------- ----------------- +DST_MAC DST_MAC +SRC_MAC SRC_MAC +ETHERTYPE ETHERTYPE +IP_PROTOCOL IP_PROTOCOL +DST_IP DST_IP +SRC_IP SRC_IP +L4_DST_PORT L4_DST_PORT +L4_SRC_PORT L4_SRC_PORT +INNER_DST_MAC INNER_DST_MAC +INNER_SRC_MAC INNER_SRC_MAC +INNER_ETHERTYPE INNER_ETHERTYPE +INNER_IP_PROTOCOL INNER_IP_PROTOCOL +INNER_DST_IP INNER_DST_IP +INNER_SRC_IP INNER_SRC_IP +INNER_L4_DST_PORT INNER_L4_DST_PORT +INNER_L4_SRC_PORT INNER_L4_SRC_PORT +``` + +**The following command shows switch hash capabilities:** +```bash +root@sonic:/home/admin# show switch-hash capabilities +ECMP HASH LAG HASH +----------------- ----------------- +IN_PORT IN_PORT +DST_MAC DST_MAC +SRC_MAC SRC_MAC +ETHERTYPE ETHERTYPE +VLAN_ID VLAN_ID +IP_PROTOCOL IP_PROTOCOL +DST_IP DST_IP +SRC_IP SRC_IP +L4_DST_PORT L4_DST_PORT +L4_SRC_PORT L4_SRC_PORT +INNER_DST_MAC INNER_DST_MAC +INNER_SRC_MAC INNER_SRC_MAC +INNER_ETHERTYPE INNER_ETHERTYPE +INNER_IP_PROTOCOL INNER_IP_PROTOCOL +INNER_DST_IP INNER_DST_IP +INNER_SRC_IP INNER_SRC_IP +INNER_L4_DST_PORT INNER_L4_DST_PORT +INNER_L4_SRC_PORT INNER_L4_SRC_PORT +``` + +### 3.3 DUT related configuration in config_db + +``` +{ + "SWITCH_HASH": { + "GLOBAL": { + "ecmp_hash": [ + "DST_MAC", + "SRC_MAC", + "ETHERTYPE", + "IP_PROTOCOL", + "DST_IP", + "SRC_IP", + "L4_DST_PORT", + "L4_SRC_PORT", + "INNER_DST_MAC", + "INNER_SRC_MAC", + "INNER_ETHERTYPE", + "INNER_IP_PROTOCOL", + "INNER_DST_IP", + "INNER_SRC_IP", + "INNER_L4_DST_PORT", + "INNER_L4_SRC_PORT" + ], + "lag_hash": [ + "DST_MAC", + "SRC_MAC", + "ETHERTYPE", + "IP_PROTOCOL", + "DST_IP", + "SRC_IP", + "L4_DST_PORT", + "L4_SRC_PORT", + "INNER_DST_MAC", + "INNER_SRC_MAC", + "INNER_ETHERTYPE", + "INNER_IP_PROTOCOL", + "INNER_DST_IP", + "INNER_SRC_IP", + "INNER_L4_DST_PORT", + "INNER_L4_SRC_PORT" + ] + } + } +} +``` +### 3.4 Supported topology +The test should support t0 and t1 topologies. + +## 4. Test cases + +| **No.** | **Test Case** | **Test Purpose** | +|----------|-------------------|----------| +| 1 | test_hash_capability | Verify the “show switch-hash capabilities” gets the supported hash fields.| +| 2 | test_ecmp_hash | Verify the basic functionality of ecmp hash with a single hash field| +| 3 | test_lag_hash | Verify the basic functionality of lag hash with a single hash field| +| 4 | test_ecmp_and_lag_hash | Verify the hash functionality with all ecmp and lag hash fields configured| +| 5 | test_nexthop_flap | Verify the ecmp hash functionality when there is nexthop flap| +| 6 | test_lag_member_flap | Verify the lag hash functionality when there is lag member flap| +| 7 | test_lag_member_remove_add| Verify the lag hash functionality after a lag member is removed and added back to a portchannel| +| 8 | test_reboot | Verify there is no hash configuration inconsistence before and after reload/reboot| +| 9 | test_backend_error_messages | Verify there are backend errors in syslog when the hash config is removed or updated with invalid values via redis cli| + +### Notes: + 1. The tested hash field in each test case is randomly selected from a pre-defined field list per asic type. Currently these fields are tested as default: 'IN_PORT', 'SRC_MAC', 'DST_MAC', 'ETHERTYPE', 'VLAN_ID', 'IP_PROTOCOL', 'SRC_IP', 'DST_IP', 'L4_SRC_PORT', 'L4_DST_PORT', 'INNER_SRC_IP', 'INNER_DST_IP'. + 2. DST_MAC, ETHERTYPE, VLAN_ID fields are only tested in lag hash test cases, because L2 traffic is needed to test these fields, and there is no ecmp hash when the traffic is fowarded in L2. + 3. IPv4 and IPv6 are covered in the test, but the versions(including the inner version when testing the inner fields) are randomly selected in the test cases. + 4. For the inner fields, three types of encapsulations are covered: IPinIP, VxLAN and NVGRE. For the VxLAN packet, the default port 4789 and a custom port 13330 are covered in the test. + 5. For the reboot test, reboot type is randomly selected from config reload, cold, warm and fast reboot. + 6. The random selections of hash fields, ip versions, encapsulation types and reboot types can be controlled by pytest options. The user is able to set each of the option as 'random', 'all', or a specific value. + +### Test cases #1 - test_hash_capability +1. Get the supported hash fields via cli "show switch-hash capabilities" +2. Check the fields are as expected. + +### Test cases #2 - test_ecmp_hash +1. The test is using the default links and routes in a t0/t1 testbed. +2. Randomly select a hash field and configure it to the ecmp hash list via cli "config switch-hash global ecmp-hash". +3. Configure the lag hash list to exclude the selected field to verify the lag hash configuration does not affect the hash result. +4. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination via multiple nexthops. +5. Check the traffic is balanced over the nexthops. +6. If the uplinks are portchannels with multiple members, check the traffic is not balanced over the members. + +### Test cases #3 - test_lag_hash +1. The test is using the default links and routes in a t0/t1 testbed, and only runs on setups which have multi-member portchannel uplinks. +2. Randomly select a hash field and configure it to the lag hash list via cli "config switch-hash global lag-hash". +3. Configure the ecmp hash list to exclude the selected field to verify the ecmp hash configuration does not affect the hash result. +4. If the hash field is DST_MAC, ETHERTYPE or VLAN_ID, take the steps 5-7, otherwise skip them. +5. Choose one downlink interface and one uplink interface, remove all ip/ipv6 addresses on them. +6. Remove the downlink interface from the existing vlan if it is t0 topology. +7. For the DST_MAC, ETHERTYPE fields, add the chosen interfaces to a same vlan; For VLAN_ID field, add the interfaces to multiple vlans. +8. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination via the portchannels. +9. Check the traffic is forwarded through only one portchannel and is balanced over the members. + +### Test cases #4 - test_ecmp_and_lag_hash +1. The test is using the default links and routes in a t0/t1 testbed. +2. Configure all the supported hash fields for the ecmp and lag hash. +3. Randomly select one hash field to test. +4. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination. +5. Check the traffc is balanced over all the uplink physical ports. + +### Test cases #5 - test_nexthop_flap +1. The test is using the default links and routes in a t0/t1 testbed. +2. Configure all the supported hash fields for the ecmp and lag hash. +3. Randomly select one hash field to test. +4. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination. +5. Check the traffic is balanced over all the uplink ports. +6. Randomly shutdown 1 nexthop interface. +7. Send the traffic again. +8. Check the traffic is balanced over all remaining uplink ports with no packet loss. +9. Recover the interface and do shutdown/startup on the interface 3 more times. +10. Send the traffic again. +11. Check the traffic is balanced over all uplink ports with no packet loss. + +### Test cases #6 - test_lag_member_flap +1. The test is using the default links and routes in a t0/t1 testbed, and only runs on setups which have multi-member portchannel uplinks. +2. Configure all the supported hash fields for the ecmp and lag hash. +3. Randomly select one hash field to test. +4. If the hash field is DST_MAC, ETHERTYPE or VLAN_ID, take the steps 5-7, otherwise skip them. +5. Choose one downlink interface and one uplink interface, remove all ip/ipv6 addresses on them. +6. Remove the downlink interface from the existing vlan if it is t0 topology. +7. For the DST_MAC, ETHERTYPE fields, add the chosen interfaces to a same vlan; For VLAN_ID field, add the interfaces to multiple vlans. +8. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination. +9. Check the traffic is balanced over all the uplink ports. +10. Randomly shutdown 1 member port in all uplink portchannels. +11. Send the traffic again. +12. Check the traffic is balanced over all remaining uplink ports with no packet loss. +13. Recover the members and do shutdown/startup on them 3 more times. +14. Send the traffic again. +15. Check the traffic is balanced over all uplink ports with no packet loss. + +### Test cases #7 - test_lag_member_remove_add +1. The test is using the default links and routes in a t0/t1 testbed, and only runs on setups which have multi-member portchannel uplinks. +2. Configure all the supported hash fields for the ecmp and lag hash. +3. Randomly select one hash field to test. +4. If the hash field is DST_MAC, ETHERTYPE or VLAN_ID, take the steps 5-7, otherwise skip them. +5. Choose one downlink interface and one uplink interface, remove all ip/ipv6 addresses on them. +6. Remove the downlink interface from the existing vlan if it is t0 topology. +7. For the DST_MAC, ETHERTYPE fields, add the chosen interfaces to a same vlan; For VLAN_ID field, add the interfaces to multiple vlans. +8. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination. +9. Check the traffic is balanced over all the uplink ports. +10. Randomly remove 1 member port from each uplink portchannels. +11. Add the member ports back to the portchannels. +12. Send the traffic again. +13. Check the traffic is balanced over all uplink ports with no packet loss. + +### Test cases #8 - test_reboot +1. The test is using the default links and routes in a t0/t1 testbed. +2. Configure all the supported hash fields for the ecmp and lag hash. +3. Randomly select one hash field to test. +4. Randomly select a reboot type from reload or fast/warm/cold reboot, if reload or cold reboot, save the configuration before the reload/reboot. +5. Send traffic with changing values of the field under test from a downlink ptf port to uplink destination. +6. Check the traffic is balanced over all the uplink ports. +7. Do the reload/reboot. +8. After the reload/reboot, check the generic hash config via cli, it should keep the same as it was before the reload/reboot. +9. Send traffic again. +10. Check the traffic is balanced over all the uplink ports. + +### Test cases #9 - test_backend_error_messages +1. Config ecmp and lag hash via cli. +2. Remove the ecmp hash key via redis cli. +3. Check there is a warning printed in the syslog. +4. Remove the lag hash key via redis cli. +5. Check there is a warning printed in the syslog. +6. Re-config the ecmp and lag hash via cli. +7. Update the ecmp hash fields with an invalid value via redis cli. +8. Check there is a warning printed in the syslog. +9. Update the lag hash fields with an invalid value via redis cli. +10. Check there is a warning printed in the syslog. +11. Re-config the ecmp and lag hash via cli. +12. Remove the generic hash key via redis cli. +13. Check there is a warning printed in the syslog. From 7a0311afe9303a4192160464c5bcc4ab33411c26 Mon Sep 17 00:00:00 2001 From: isabelmsft <67024108+isabelmsft@users.noreply.github.com> Date: Tue, 10 Oct 2023 16:01:59 -0700 Subject: [PATCH 05/10] [GCU] Skip update port index test (#10242) ## Description of PR Summary: ADO 25002053 ### Type of change Skip GCU update port index test, as this is not a production scenario - [ ] Bug fix - [ ] Testbed and Framework(new/improvement) - [x] Test case(new/improvement) --- tests/generic_config_updater/test_eth_interface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/generic_config_updater/test_eth_interface.py b/tests/generic_config_updater/test_eth_interface.py index 5bdade097e..d5c73d7695 100644 --- a/tests/generic_config_updater/test_eth_interface.py +++ b/tests/generic_config_updater/test_eth_interface.py @@ -223,6 +223,7 @@ def test_replace_fec(duthost, ensure_dut_readiness, fec): delete_tmpfile(duthost, tmpfile) +@pytest.mark.skip(reason="Bypass as this is not a production scenario") def test_update_invalid_index(duthost, ensure_dut_readiness): json_patch = [ { @@ -242,6 +243,7 @@ def test_update_invalid_index(duthost, ensure_dut_readiness): delete_tmpfile(duthost, tmpfile) +@pytest.mark.skip(reason="Bypass as this is not a production scenario") def test_update_valid_index(duthost, ensure_dut_readiness): output = duthost.shell('sonic-db-cli CONFIG_DB keys "PORT|"\\*')["stdout"] interfaces = {} # to be filled with two interfaces mapped to their indeces From e69c432e8df1587963896d29909d1a00934a81c5 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 11 Oct 2023 13:04:42 +0800 Subject: [PATCH 06/10] Improve deployment performance (#10094) What is the motivation for this PR? If we run "testbed-cli.sh add-topo" to deploy testbed, playbook ansible/roles/vm_set/tasks/main.yml may be called multiple times. This playbook contains some heavy tasks for configuring the test server, for example, installing packages, configuring sysctl conf, etc. Consequence is that these heavy tasks will be executed multiple times. This is a waste of time. What's more, when "testbed-cli.sh add-topo" is executed again for a testbed on the same server, these tasks will still be executed multiple times even if the test server already has the required packages configured and sysconf set to proper values. How did you do it? This PR extracted the server configuring tasks into a dedicated playbook. It will only be called once during add-topo. Another improvement is to add a file flag /var/run/sonic/server_ready. When the file presents, the server configuring tasks will be skipped. The last step is to check ansible version. If ansible version >= 2.12, then use "lock_timeout" argument for the apt modules to wait for the apt db lock. After ansible version of sonic-mgmt docker image is upgraded, this argument can avoid potential apt db locking conflicts while running multiple testbed deployment jobs on same server. Before ansible is upgraded, the chance of conflicts still can be significantly reduced with the new server_ready file flag. How did you verify/test it? Tested add-topo/remove-topo on both physical and KVM testbed. Tested on a physical testbed for t0 topology: With this improvement Without this improvement add-topo 6m7s 9m1s remove-topo 3m10s 2m45s --- .../roles/vm_set/tasks/configure_server.yml | 234 ++++++++++++++++++ ansible/roles/vm_set/tasks/main.yml | 128 +--------- ansible/testbed_add_vm_topology.yml | 1 + 3 files changed, 238 insertions(+), 125 deletions(-) create mode 100644 ansible/roles/vm_set/tasks/configure_server.yml diff --git a/ansible/roles/vm_set/tasks/configure_server.yml b/ansible/roles/vm_set/tasks/configure_server.yml new file mode 100644 index 0000000000..ba922ae3cf --- /dev/null +++ b/ansible/roles/vm_set/tasks/configure_server.yml @@ -0,0 +1,234 @@ +- name: Check flag file to see if server is configured + stat: + path: /var/run/sonic/server_ready + register: server_ready + +- name: Check if force configure server is set + set_fact: + force_configure_server: false + when: force_configure_server is not defined + +- block: + - name: Firstly clear the server ready flag file + file: + path: /var/run/sonic/server_ready + state: absent + become: yes + + - name: Set default apt_lock_timeout + set_fact: + support_apt_lock_timeout: false + + # Ansible apt module supports argument "lock_timeout" since ansible core version 2.12 + # - https://docs.ansible.com/ansible/latest/collections/ansible/builtin/apt_module.html#parameter-lock_timeout + # We can take advantage of this argument to avoid potential apt lock conflicts while running add-topo in parallel + # for multiple testbeds on the same server. + # The problem is that ansible version in sonic-mgmt docker image is not upgraded to 2.12 yet. So, we need to + # check ansible version and set the support_apt_lock_timeout flag accordingly. Then we can use this flag to + # decide whether to use lock_timeout argument in apt module or not. In the future, when ansible version in + # sonic-mgmt docker image is upgraded to 2.12, we can benefit from the lock_timeout argument directly. + - name: Support apt lock_timeout or not + set_fact: + support_apt_lock_timeout: true + when: + - ansible_version.major|int >= 2 + - ansible_version.minor|int >= 12 + + - name: Check if kernel upgrade needed + set_fact: + kernel_upgrade_needed: true + when: + - host_distribution.stdout == "Ubuntu" + - host_distribution_version.stdout == "17.04" + - host_kernel.stdout.find('4.10.0') != -1 + - "{{ host_kernel.stdout | regex_replace('4.10.0-([0-9]+)-.*', '\\1') | int < 25 }}" + + - block: + - debug: msg="{{ host_kernel.stdout }}" + + - name: Upgrade kernel package + apt: pkg={{ item }} state=latest + become: yes + with_items: + - linux-image-generic + - linux-image-extra-virtual + when: not support_apt_lock_timeout|bool + + - name: Upgrade kernel package + apt: pkg={{ item }} state=latest lock_timeout=600 + become: yes + with_items: + - linux-image-generic + - linux-image-extra-virtual + when: support_apt_lock_timeout|bool + + - name: Prompt for rebooting + fail: + msg: "Kernel upgraded, need to reboot!" + when: + - kernel_upgrade_needed is defined + - package_installation|bool + + - block: + - name: Install necessary packages + apt: pkg={{ item }} update_cache=yes cache_valid_time=86400 + become: yes + with_items: + - ifupdown + - qemu + - openvswitch-switch + - net-tools + - bridge-utils + - util-linux + - iproute2 + - vlan + - apt-transport-https + - ca-certificates + - curl + - software-properties-common + - libvirt-clients + when: not support_apt_lock_timeout|bool + + - name: Install necessary packages + apt: pkg={{ item }} update_cache=yes cache_valid_time=86400 lock_timeout=600 + become: yes + with_items: + - ifupdown + - qemu + - openvswitch-switch + - net-tools + - bridge-utils + - util-linux + - iproute2 + - vlan + - apt-transport-https + - ca-certificates + - curl + - software-properties-common + - libvirt-clients + when: support_apt_lock_timeout|bool + + - name: Install necessary packages + apt: + pkg: + - python + - libvirt-bin + - python-libvirt + - python-pip + become: yes + when: + - host_distribution_version.stdout == "18.04" + - not support_apt_lock_timeout|bool + + - name: Install necessary packages + apt: + pkg: + - python + - libvirt-bin + - python-libvirt + - python-pip + lock_timeout: 600 + become: yes + when: + - host_distribution_version.stdout == "18.04" + - support_apt_lock_timeout|bool + + - name: Install necessary packages + apt: + pkg: + - python3-libvirt + - python3-pip + - libvirt-daemon-system + - qemu-system-x86 + become: yes + when: + - host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04" + - not support_apt_lock_timeout|bool + + - name: Install necessary packages + apt: + pkg: + - python3-libvirt + - python3-pip + - libvirt-daemon-system + - qemu-system-x86 + lock_timeout: 600 + become: yes + when: + - host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04" + - support_apt_lock_timeout|bool + + when: package_installation|bool + + - name: remove old python packages + pip: name=docker-py state=absent executable={{ pip_executable }} + environment: "{{ proxy_env | default({}) }}" + ignore_errors: yes + + - include_tasks: docker.yml + when: package_installation|bool + + - name: Ensure {{ ansible_user }} in docker,sudo group + user: + name: "{{ ansible_user }}" + append: yes + groups: docker,sudo + become: yes + + - name: Ensure {{ ansible_user }} in libvirt group + user: + name: "{{ ansible_user }}" + append: yes + groups: libvirt + become: yes + when: host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04" + + - name: Install br_netfilter kernel module + become: yes + modprobe: name=br_netfilter state=present + + - name: Set sysctl bridge parameters for testbed + sysctl: + name: "{{ item }}" + value: "0" + sysctl_set: yes + become: yes + with_items: + - net.bridge.bridge-nf-call-arptables + - net.bridge.bridge-nf-call-ip6tables + - net.bridge.bridge-nf-call-iptables + + - name: Set sysctl RCVBUF max parameter for testbed + sysctl: + name: "net.core.rmem_max" + value: "509430500" + sysctl_set: yes + become: yes + + - name: Set sysctl RCVBUF default parameter for testbed + sysctl: + name: "net.core.rmem_default" + value: "31457280" + sysctl_set: yes + become: yes + + - name: Setup external front port + include_tasks: external_port.yml + when: external_port is defined + + - name: Setup internal management network + include_tasks: internal_mgmt_network.yml + when: internal_mgmt_network is defined and internal_mgmt_network == True + + - name: Prepare parent directory + shell: + cmd: "mkdir -p /var/run/sonic" + become: yes + + - name: Set the server ready flag file + file: + path: /var/run/sonic/server_ready + state: touch + become: yes + + when: not server_ready.stat.exists or force_configure_server|bool diff --git a/ansible/roles/vm_set/tasks/main.yml b/ansible/roles/vm_set/tasks/main.yml index 8f03fddbe6..108c4d6f84 100644 --- a/ansible/roles/vm_set/tasks/main.yml +++ b/ansible/roles/vm_set/tasks/main.yml @@ -35,72 +35,6 @@ register: host_kernel changed_when: False -- name: Check if kernel upgrade needed - set_fact: - kernel_upgrade_needed: true - when: - - host_distribution.stdout == "Ubuntu" - - host_distribution_version.stdout == "17.04" - - host_kernel.stdout.find('4.10.0') != -1 - - "{{ host_kernel.stdout | regex_replace('4.10.0-([0-9]+)-.*', '\\1') | int < 25 }}" - -- block: - - debug: msg="{{ host_kernel.stdout }}" - - - name: Upgrade kernel package - apt: pkg={{ item }} state=latest - become: yes - with_items: - - linux-image-generic - - linux-image-extra-virtual - - - name: Prompt for rebooting - fail: - msg: "Kernel upgraded, need to reboot!" - when: - - kernel_upgrade_needed is defined - - package_installation|bool - -- block: - - name: Install necessary packages - apt: pkg={{ item }} update_cache=yes cache_valid_time=86400 - become: yes - with_items: - - ifupdown - - qemu - - openvswitch-switch - - net-tools - - bridge-utils - - util-linux - - iproute2 - - vlan - - apt-transport-https - - ca-certificates - - curl - - software-properties-common - - libvirt-clients - - - name: Install necessary packages - apt: - pkg: - - python - - libvirt-bin - - python-libvirt - - python-pip - become: yes - when: host_distribution_version.stdout == "18.04" - - - name: Install necessary packages - apt: - pkg: - - python3-libvirt - - python3-pip - - libvirt-daemon-system - - qemu-system-x86 - become: yes - when: host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04" - when: package_installation|bool - - name: Get default pip_executable set_fact: pip_executable: pip @@ -111,65 +45,9 @@ pip_executable: pip3 when: pip_executable is not defined and (host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04") -- name: remove old python packages - pip: name=docker-py state=absent executable={{ pip_executable }} - environment: "{{ proxy_env | default({}) }}" - ignore_errors: yes - -- include_tasks: docker.yml - when: package_installation|bool - -- name: Ensure {{ ansible_user }} in docker,sudo group - user: - name: "{{ ansible_user }}" - append: yes - groups: docker,sudo - become: yes - -- name: Ensure {{ ansible_user }} in libvirt group - user: - name: "{{ ansible_user }}" - append: yes - groups: libvirt - become: yes - when: host_distribution_version.stdout == "20.04" or host_distribution_version.stdout == "22.04" - -- name: Install br_netfilter kernel module - become: yes - modprobe: name=br_netfilter state=present - -- name: Set sysctl bridge parameters for testbed - sysctl: - name: "{{ item }}" - value: "0" - sysctl_set: yes - become: yes - with_items: - - net.bridge.bridge-nf-call-arptables - - net.bridge.bridge-nf-call-ip6tables - - net.bridge.bridge-nf-call-iptables - -- name: Set sysctl RCVBUF max parameter for testbed - sysctl: - name: "net.core.rmem_max" - value: "509430500" - sysctl_set: yes - become: yes - -- name: Set sysctl RCVBUF default parameter for testbed - sysctl: - name: "net.core.rmem_default" - value: "31457280" - sysctl_set: yes - become: yes - -- name: Setup external front port - include_tasks: external_port.yml - when: external_port is defined - -- name: Setup internal management network - include_tasks: internal_mgmt_network.yml - when: internal_mgmt_network is defined and internal_mgmt_network == True +- name: Configure server + include_tasks: configure_server.yml + when: action == 'configure_server' - block: - getent: diff --git a/ansible/testbed_add_vm_topology.yml b/ansible/testbed_add_vm_topology.yml index 3b3ca2ed92..d766ec01ba 100644 --- a/ansible/testbed_add_vm_topology.yml +++ b/ansible/testbed_add_vm_topology.yml @@ -104,6 +104,7 @@ when: duts_name.split(',')|length > 1 roles: + - { role: vm_set, action: 'configure_server' } - { role: vm_set, action: 'stop_sonic_vm', when force_stop_sonic_vm is defined } - { role: vm_set, action: 'start_sonic_vm' } - { role: vm_set, action: 'start_sid' } From 2192c2f15781d45c9d794e13a96f0f270977102a Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 11 Oct 2023 15:03:50 +0800 Subject: [PATCH 07/10] Avoid running command in exited ptf docker container (#10286) While stopping PTF container, "ptf_control" module is executed to kill all processes in the PTF container. The original code checks if the PTF container's Pid exists before running command in the PTF container. Unfortunately, this check is not enough. PTF docker container in exited status still has Pid. This change improved the code for getting PTF container's Pid. When PTF container is not in "running" status, always return None for PTF container's Pid. Signed-off-by: Xin Wang --- ansible/roles/vm_set/library/ptf_control.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ansible/roles/vm_set/library/ptf_control.py b/ansible/roles/vm_set/library/ptf_control.py index 5e3d6bcbb5..366b6160fc 100644 --- a/ansible/roles/vm_set/library/ptf_control.py +++ b/ansible/roles/vm_set/library/ptf_control.py @@ -61,10 +61,12 @@ def get_pid(ctn_name): cli = docker.from_env() try: ctn = cli.containers.get(ctn_name) - except Exception: - return None + if ctn.status == 'running': + return ctn.attrs['State']['Pid'] + except Exception as e: + logging.debug("Failed to get pid for container %s: %s" % (ctn_name, str(e))) - return ctn.attrs['State']['Pid'] + return None def get_process_pids(self, process): cmd = 'docker exec -t {} bash -c "pgrep -f \'{}\'"'.format(self.ctn_name, process) From 32834f86380ce79bc3e73709e567b4ddb0b9289f Mon Sep 17 00:00:00 2001 From: Yaqiang Zhu Date: Wed, 11 Oct 2023 21:04:39 +0800 Subject: [PATCH 08/10] [m0][mx] Skip test_acl_outer_vlan for m0/mx (#10289) What is the motivation for this PR? There is no application scenario for test_acl_outer_vlan in m0, no need to run this case. How did you do it? Remove pytest mark to kip test_acl_outer_vlan for m0/mx. How did you verify/test it? Run tests. Signed-off-by: Yaqiang Zhu --- tests/acl/test_acl_outer_vlan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/acl/test_acl_outer_vlan.py b/tests/acl/test_acl_outer_vlan.py index 2051e0362f..34ea838bda 100644 --- a/tests/acl/test_acl_outer_vlan.py +++ b/tests/acl/test_acl_outer_vlan.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) pytestmark = [ - pytest.mark.topology('t0', 'm0', 'mx'), + pytest.mark.topology('t0'), pytest.mark.disable_loganalyzer, # Disable automatic loganalyzer, since we use it for the test ] From c76f35835c5efa4c8be97df329ad901c65762a07 Mon Sep 17 00:00:00 2001 From: wenyiz2021 <91497961+wenyiz2021@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:42:26 -0700 Subject: [PATCH 09/10] [test_chassisd] wait critical processes to come up in test setup (#10230) --- tests/platform_tests/daemon/test_chassisd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/platform_tests/daemon/test_chassisd.py b/tests/platform_tests/daemon/test_chassisd.py index 6d9711b642..f30cca331c 100644 --- a/tests/platform_tests/daemon/test_chassisd.py +++ b/tests/platform_tests/daemon/test_chassisd.py @@ -13,7 +13,7 @@ from tests.common.helpers.assertions import pytest_assert from tests.common.platform.daemon_utils import check_pmon_daemon_enable_status -from tests.common.platform.processes_utils import check_critical_processes +from tests.common.platform.processes_utils import check_critical_processes, wait_critical_processes from tests.common.utilities import compose_dict_from_cli, wait_until from collections import OrderedDict @@ -42,6 +42,7 @@ def setup(duthosts, enum_rand_one_per_hwsku_hostname): daemon_en_status = check_pmon_daemon_enable_status(duthost, daemon_name) if daemon_en_status is False: pytest.skip("{} is not enabled in {} {}".format(daemon_name, duthost.facts['platform'], duthost.os_version)) + wait_critical_processes(duthost) @pytest.fixture(scope="module", autouse=True) From 39228d1a6b5411428a88394c78719e4b896fd8c2 Mon Sep 17 00:00:00 2001 From: Song Yuan <64041228+ysmanman@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:43:41 -0700 Subject: [PATCH 10/10] Acl test is flaky in checking syslogs. Test failed even the log was there. (#10271) Add delay to make sure ACL is applied before checking syslog. --- tests/acl/test_acl.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/acl/test_acl.py b/tests/acl/test_acl.py index bfb36e9703..3d4b5928b2 100644 --- a/tests/acl/test_acl.py +++ b/tests/acl/test_acl.py @@ -532,6 +532,8 @@ def create_or_remove_acl_table(duthost, acl_table_config, setup, op, topo): logger.info("Removing ACL table \"{}\" in namespace {} on device {}" .format(acl_table_config["table_name"], namespace, duthost)) sonic_host_or_asic_inst.command("config acl remove table {}".format(acl_table_config["table_name"])) + # Give the dut some time for the ACL to be applied and LOG message generated + time.sleep(30) @pytest.fixture(scope="module") @@ -668,6 +670,8 @@ def acl_rules(self, duthosts, localhost, setup, acl_table, populate_vlan_arp_ent loganalyzer.ignore_regex = [r".*"] with loganalyzer: self.setup_rules(duthost, acl_table, ip_version) + # Give the dut some time for the ACL rules to be applied and LOG message generated + time.sleep(30) self.post_setup_hook(duthost, localhost, populate_vlan_arp_entries, tbinfo, conn_graph_facts)