Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] Support PSU power threshold checking #6288

Merged
4 changes: 2 additions & 2 deletions tests/common/platform/device_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_dut_psu_line_pattern(dut):
if "201811" in dut.os_version or "201911" in dut.os_version:
psu_line_pattern = re.compile(r"PSU\s+(\d)+\s+(OK|NOT OK|NOT PRESENT)")
elif dut.facts['platform'] == "x86_64-dellemc_z9332f_d1508-r0" or dut.facts['asic_type'] == "cisco-8000":
psu_line_pattern = re.compile(r"PSU\s+(\d+).*?(OK|NOT OK|NOT PRESENT)\s+(N/A)")
psu_line_pattern = re.compile(r"PSU\s+(\d+).*?(OK|NOT OK|NOT PRESENT|WARNING)\s+(N/A)")
else:
"""
Changed the pattern to match space (s+) and non-space (S+) only.
Expand All @@ -45,7 +45,7 @@ def get_dut_psu_line_pattern(dut):
PSU 2 N/A N/A 12.01 4.12 49.50 OK green

"""
psu_line_pattern = re.compile(r"PSU\s+(\d+).*?(OK|NOT OK|NOT PRESENT)\s+(green|amber|red|off)")
psu_line_pattern = re.compile(r"PSU\s+(\d+).*?(OK|NOT OK|NOT PRESENT|WARNING)\s+(green|amber|red|off)")
return psu_line_pattern


Expand Down
17 changes: 17 additions & 0 deletions tests/platform_tests/mellanox/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def pytest_addoption(parser):
'''
Adds option to Mellanox specific pytest

Args:
parser: pytest parser object

Returns:
None
'''
mellanox_group = parser.getgroup("Mellanox test suite options")

mellanox_group.addoption(
"--mock_any_testbed",
action="store_true",
help="Mock on testbeds which do not support PSU power thresholds",
)
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ class MockerHelper:
# LED related sys fs folder path.
LED_PATH = '/var/run/hw-management/led/'

# Config path
CONFIG_PATH = '/var/run/hw-management/config/'

# Power path
POWER_PATH = '/var/run/hw-management/power/'

# FAN number of DUT.
FAN_NUM = 0

Expand Down Expand Up @@ -190,21 +196,26 @@ def mock_led_value(self, file_path, value):
file_path = os.path.join(MockerHelper.LED_PATH, file_path)
self.mock_value(file_path, value)

def mock_value(self, file_path, value):
def mock_value(self, file_path, value, force=False):
"""
Unlink existing sys fs file and replace it with a new one. Write given value to the new file.
:param file_path: Sys fs file path.
:param value: Value to write to sys fs file.
:param force: Force mock even if the file does not exist.
:return:
"""
if file_path not in self.regular_file_list and file_path not in self.unlink_file_list:
out = self.dut.stat(path=file_path)
exist = True
if not out['stat']['exists']:
raise SysfsNotExistError('{} not exist'.format(file_path))
if out['stat']['islnk']:
if force:
exist = False
else:
raise SysfsNotExistError('{} not exist'.format(file_path))
if exist and out['stat']['islnk']:
self._unlink(file_path)
else:
self._cache_file_value(file_path)
self._cache_file_value(file_path, force)
self.dut.shell('echo \'{}\' > {}'.format(value, file_path))

def read_thermal_value(self, file_path):
Expand Down Expand Up @@ -241,7 +252,7 @@ def read_value(self, file_path):
except Exception as e:
assert 0, "Get content from %s failed, exception: %s" % (file_path, repr(e))

def _cache_file_value(self, file_path):
def _cache_file_value(self, file_path, may_nexist=False):
"""
Cache file value for regular file.
:param file_path: Regular file path.
Expand All @@ -252,7 +263,10 @@ def _cache_file_value(self, file_path):
value = output["stdout"]
self.regular_file_list[file_path] = value.strip()
except Exception as e:
assert 0, "Get content from %s failed, exception: %s" % (file_path, repr(e))
if may_nexist:
self.regular_file_list[file_path] = None
else:
assert 0, "Get content from %s failed, exception: %s" % (file_path, repr(e))

def _unlink(self, file_path):
"""
Expand Down Expand Up @@ -282,7 +296,10 @@ def deinit(self):
failed_recover_files = {}
for file_path, value in self.regular_file_list.items():
try:
self.dut.shell('echo \'{}\' > {}'.format(value, file_path))
if value is None:
self.dut.shell('rm -f {}'.format(file_path))
else:
self.dut.shell('echo \'{}\' > {}'.format(value, file_path))
except Exception as e:
# Catch any exception for later retry
failed_recover_files[file_path] = value
Expand Down Expand Up @@ -1192,3 +1209,67 @@ def mock_cpu_pack_temperature(self, temperature):

def get_cpu_cooling_state(self):
return int(self.mock_helper.read_value(self.CPU_COOLING_STATE_FILE))


@mocker('PsuPowerThresholdMocker')
class PsuPowerThresholdMocker(object):
PORT_AMBIENT_TEMP = '/var/run/hw-management/thermal/port_amb'
FAN_AMBIENT_TEMP = '/var/run/hw-management/thermal/fan_amb'
AMBIENT_TEMP_CRITICAL_THRESHOLD = '/var/run/hw-management/config/amb_tmp_crit_limit'
AMBIENT_TEMP_WARNING_THRESHOLD = '/var/run/hw-management/config/amb_tmp_warn_limit'
PSU_POWER_SLOPE = '/var/run/hw-management/config/psu_power_slope'
PSU_POWER_CAPACITY = '/var/run/hw-management/config/psu{}_power_capacity'
PSU_POWER = '/var/run/hw-management/power/psu{}_power'

def __init__(self, dut):
self.mock_helper = MockerHelper(dut)

def deinit(self):
self.mock_helper.deinit()

def mock_power_threshold(self, number_psus):
self.mock_helper.mock_value(self.AMBIENT_TEMP_WARNING_THRESHOLD, 65000, True)
self.mock_helper.mock_value(self.AMBIENT_TEMP_CRITICAL_THRESHOLD, 75000, True)
self.mock_helper.mock_value(self.PSU_POWER_SLOPE, 2000, True)

max_power = None
for i in range(number_psus):
if not max_power:
power = int(self.mock_helper.read_value(self.PSU_POWER.format(i + 1)))
# Round up to 100 watt and then double it to avoid noise when power fluctuate
max_power = int(round(power/100000000.0)) * 100000000 * 2
self.mock_helper.mock_value(self.PSU_POWER_CAPACITY.format(i + 1), max_power, True)

# Also mock ambient temperatures
self.mock_helper.mock_value(self.PORT_AMBIENT_TEMP, self.read_port_ambient_thermal())
self.mock_helper.mock_value(self.FAN_AMBIENT_TEMP, self.read_fan_ambient_thermal())

def mock_psu_power(self, psu, power):
self.mock_helper.mock_value(self.PSU_POWER.format(psu), int(power))

def mock_fan_ambient_thermal(self, temperature):
self.mock_helper.mock_value(self.FAN_AMBIENT_TEMP, int(temperature))

def mock_port_ambient_thermal(self, temperature):
self.mock_helper.mock_value(self.PORT_AMBIENT_TEMP, int(temperature))

def read_psu_power_threshold(self, psu):
return int(self.mock_helper.read_value(self.PSU_POWER_CAPACITY.format(psu)))

def read_psu_power_slope(self):
return int(self.mock_helper.read_value(self.PSU_POWER_SLOPE))

def read_psu_power(self, psu):
return int(self.mock_helper.read_value(self.PSU_POWER.format(psu)))

def read_ambient_temp_critical_threshold(self):
return int(self.mock_helper.read_value(self.AMBIENT_TEMP_CRITICAL_THRESHOLD))

def read_ambient_temp_warning_threshold(self):
return int(self.mock_helper.read_value(self.AMBIENT_TEMP_WARNING_THRESHOLD))

def read_port_ambient_thermal(self):
return int(self.mock_helper.read_value(self.PORT_AMBIENT_TEMP))

def read_fan_ambient_thermal(self):
return int(self.mock_helper.read_value(self.FAN_AMBIENT_TEMP))
Loading