forked from sonic-net/sonic-buildimage
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Mellanox] Add CPU thermal control for SN4800
- Loading branch information
1 parent
cf1bc8d
commit f65991b
Showing
5 changed files
with
159 additions
and
2 deletions.
There are no files selected for viewing
46 changes: 46 additions & 0 deletions
46
platform/mellanox/mlnx-platform-api/sonic_platform/cpu_thermal_control.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from sonic_py_common.task_base import ThreadTaskBase | ||
|
||
from . import utils | ||
from .device_data import DeviceDataManager | ||
|
||
|
||
class CPUThermalControl(ThreadTaskBase): | ||
CPU_COOLING_STATE = '/var/run/hw-management/thermal/cooling2_cur_state' | ||
CPU_TEMP_FILE = '/var/run/hw-management/thermal/cpu_pack' | ||
MAX_COOLING_STATE = 10 | ||
MIN_COOLING_STATE = 2 | ||
INTERVAL = 3 | ||
|
||
def __init__(self): | ||
super(CPUThermalControl, self).__init__() | ||
self.temp_low, self.temp_high = DeviceDataManager.get_cpu_thermal_threshold() | ||
|
||
def task_worker(self): | ||
last_temp = 0 | ||
while not self.task_stopping_event.wait(self.INTERVAL): | ||
last_temp = self.run(last_temp) | ||
|
||
def run(self, last_temp): | ||
current_temp = self.read_cpu_temp() | ||
if current_temp < self.temp_low: | ||
self.set_cooling_state(self.MIN_COOLING_STATE) | ||
elif current_temp > self.temp_high: | ||
self.set_cooling_state(self.MAX_COOLING_STATE) | ||
else: | ||
cooling_state = self.get_cooling_state() | ||
if current_temp > last_temp: | ||
self.set_cooling_state(min(cooling_state + 1, self.MAX_COOLING_STATE)) | ||
elif current_temp < last_temp: | ||
self.set_cooling_state(max(cooling_state - 1, self.MIN_COOLING_STATE)) | ||
return current_temp | ||
|
||
def set_cooling_state(self, state): | ||
utils.write_file(self.CPU_COOLING_STATE, state, log_func=None) | ||
|
||
def get_cooling_state(self): | ||
return utils.read_int_from_file(self.CPU_COOLING_STATE, default=self.MAX_COOLING_STATE, log_func=None) | ||
|
||
def read_cpu_temp(self): | ||
cpu_temp = utils.read_int_from_file(self.CPU_TEMP_FILE, default=self.temp_high, log_func=None) | ||
return cpu_temp if cpu_temp <= 1000 else int(cpu_temp / 1000) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
platform/mellanox/mlnx-platform-api/tests/test_cpu_thermal_control.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import glob | ||
import os | ||
import pytest | ||
import sys | ||
if sys.version_info.major == 3: | ||
from unittest import mock | ||
else: | ||
import mock | ||
|
||
test_path = os.path.dirname(os.path.abspath(__file__)) | ||
modules_path = os.path.dirname(test_path) | ||
sys.path.insert(0, modules_path) | ||
|
||
from sonic_platform.cpu_thermal_control import CPUThermalControl | ||
|
||
|
||
class TestCPUThermalControl: | ||
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_cpu_thermal_threshold', mock.MagicMock(return_value=(85, 95))) | ||
@mock.patch('sonic_platform.utils.read_int_from_file') | ||
@mock.patch('sonic_platform.utils.write_file') | ||
def test_run(self, mock_write_file, mock_read_file): | ||
instance = CPUThermalControl() | ||
file_content = { | ||
CPUThermalControl.CPU_COOLING_STATE: 5, | ||
CPUThermalControl.CPU_TEMP_FILE: instance.temp_high + 1 | ||
} | ||
|
||
def read_file(file_path, **kwargs): | ||
return file_content[file_path] | ||
|
||
mock_read_file.side_effect = read_file | ||
# Test current temp is higher than high threshold | ||
instance.run(0) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, CPUThermalControl.MAX_COOLING_STATE, log_func=None) | ||
|
||
# Test current temp is lower than low threshold | ||
file_content[CPUThermalControl.CPU_TEMP_FILE] = instance.temp_low - 1 | ||
instance.run(0) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, CPUThermalControl.MIN_COOLING_STATE, log_func=None) | ||
|
||
# Test current temp increasing | ||
file_content[CPUThermalControl.CPU_TEMP_FILE] = instance.temp_low | ||
instance.run(0) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, 6, log_func=None) | ||
|
||
# Test current temp decreasing | ||
instance.run(instance.temp_low + 1) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, 4, log_func=None) | ||
|
||
# Test current temp increasing and current cooling state is already the max | ||
file_content[CPUThermalControl.CPU_TEMP_FILE] = 85 | ||
file_content[CPUThermalControl.CPU_COOLING_STATE] = CPUThermalControl.MAX_COOLING_STATE | ||
instance.run(84) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, CPUThermalControl.MAX_COOLING_STATE, log_func=None) | ||
|
||
# Test current temp decreasing and current cooling state is already the max | ||
file_content[CPUThermalControl.CPU_COOLING_STATE] = CPUThermalControl.MIN_COOLING_STATE | ||
instance.run(86) | ||
mock_write_file.assert_called_with(CPUThermalControl.CPU_COOLING_STATE, CPUThermalControl.MIN_COOLING_STATE, log_func=None) |