Skip to content

Commit

Permalink
Add thermal control support for SONiC (#3949)
Browse files Browse the repository at this point in the history
  • Loading branch information
Junchao-Mellanox authored and abdosi committed May 1, 2020
1 parent f762c77 commit 109a13c
Show file tree
Hide file tree
Showing 36 changed files with 1,019 additions and 13 deletions.
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json
72 changes: 72 additions & 0 deletions device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "60"
}
]
}
]
}
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json
11 changes: 11 additions & 0 deletions dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,14 @@ stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}

{% if not skip_thermalctld %}
[program:thermalctld]
command=/usr/bin/thermalctld
priority=9
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}
4 changes: 4 additions & 0 deletions dockers/docker-platform-monitor/start.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,7 @@ supervisorctl start psud
supervisorctl start syseepromd
{% endif %}

{% if not skip_thermalctld %}
supervisorctl start thermalctld
{% endif %}

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
From 76b02916794be2e2558fcff1d11609a594f633d7 Mon Sep 17 00:00:00 2001
From: Stephen Sun <stephens@mellanox.com>
Date: Fri, 14 Feb 2020 13:48:00 +0800
Subject: [PATCH] Disable thermal policy running in hw-mgmt service SONiC
thermal control algorithm has been supported.

Signed-off-by: Stephen Sun <stephens@mellanox.com>
---
usr/usr/bin/hw-management.sh | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 2cdbfb2..48b41d5 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -799,7 +799,11 @@ do_start()
#disabled for leopard chipless bringup.
echo 1 > $config_path/suspend

- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
+#
+# Disable thermal control algorithm in hw-management service
+# because there has already been that in SONiC
+#
+# $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
}

do_stop()
--
1.9.1

1 change: 1 addition & 0 deletions platform/mellanox/mlnx-platform-api.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl
$(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api
$(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2
$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE)
SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)

export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))"
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
.cache/
3 changes: 3 additions & 0 deletions platform/mellanox/mlnx-platform-api/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aliases]
test=pytest
9 changes: 9 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
maintainer_email='kevinw@mellanox.com',
packages=[
'sonic_platform',
'tests'
],
setup_requires= [
'pytest-runner'
],
tests_require = [
'pytest',
'mock>=2.0.0'
],
classifiers=[
'Development Status :: 3 - Alpha',
Expand All @@ -26,5 +34,6 @@
'Topic :: Utilities',
],
keywords='sonic SONiC platform PLATFORM',
test_suite='setup.get_test_suite'
)

Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__all__ = ["platform", "chassis"]
from sonic_platform import *
from sonic_platform import *
5 changes: 5 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,3 +442,8 @@ def get_change_event(self, timeout=0):
return True, {'sfp':port_dict}
else:
return True, {'sfp':{}}

def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

18 changes: 13 additions & 5 deletions platform/mellanox/mlnx-platform-api/sonic_platform/fan.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,22 @@ def __init__(self, fan_index, drawer_index = 1, psu_fan = False):
self.fan_speed_set_path = "fan{}_speed_set".format(self.index)
self.fan_presence_path = "fan{}_status".format(self.drawer_index)
self.fan_max_speed_path = "fan{}_max".format(self.index)
self._name = "fan{}".format(fan_index + 1)
else:
self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_max_speed_path = "psu{}_max".format(self.index)
self._name = 'psu_{}_fan_{}'.format(self.index, fan_index)
self.fan_max_speed_path = None
self.fan_status_path = "fan{}_fault".format(self.index)
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
self.fan_orange_led_path = "led_fan{}_orange".format(self.drawer_index)
self.fan_pwm_path = "pwm1"
self.fan_led_cap_path = "led_fan{}_capability".format(self.drawer_index)

def get_name(self):
return self._name

def get_status(self):
"""
Retrieves the operational status of fan
Expand Down Expand Up @@ -123,7 +128,11 @@ def get_speed(self):
speed_in_rpm = int(fan_curr_speed.read())
except (ValueError, IOError):
speed_in_rpm = 0


if self.fan_max_speed_path is None:
# in case of max speed unsupported, we just return speed in unit of RPM.
return speed_in_rpm

max_speed_in_rpm = self._get_max_speed_in_rpm()
speed = 100*speed_in_rpm/max_speed_in_rpm

Expand All @@ -136,11 +145,10 @@ def get_target_speed(self):
Returns:
int: percentage of the max fan speed
"""
speed = 0

if self.is_psu_fan:
# Not like system fan, psu fan speed can not be modified, so target speed is N/A
return speed
return self.get_speed()

try:
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm:
pwm = int(fan_pwm.read())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self):
self._chassis.initialize_psu()
self._chassis.initialize_fan()
self._chassis.initialize_eeprom()
self._chassis.initialize_thermals()

def _is_host(self):
"""
Expand Down
6 changes: 5 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(self, psu_index, sku):
psu_oper_status = "thermal/psu{}_pwr_status".format(self.index)
#psu_oper_status should always be present for all SKUs
self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status)
self._name = "PSU{}".format(psu_index + 1)

if sku in hwsku_dict_psu:
filemap = psu_profile_list[hwsku_dict_psu[sku]]
Expand Down Expand Up @@ -92,7 +93,10 @@ def __init__(self, psu_index, sku):

fan = Fan(psu_index, psu_index, True)
if fan.get_presence():
self._fan = fan
self._fan_list.append(fan)

def get_name(self):
return self._name

def _read_generic_file(self, filename, len):
"""
Expand Down
Loading

0 comments on commit 109a13c

Please sign in to comment.