Skip to content

Commit

Permalink
Revert the special handling of RJ45 ports (#56)
Browse files Browse the repository at this point in the history
* Revert the special handling of RJ45 ports

sfp.py
sfp_event.py
chassis.py

Signed-off-by: Stephen Sun <stephens@nvidia.com>

* Remove deadcode

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs authored and keboliu committed Jun 6, 2022
1 parent 356a120 commit 5c82369
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 164 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def get_change_event(self, timeout=0):
# Initialize SFP event first
if not self.sfp_event:
from .sfp_event import sfp_event
self.sfp_event = sfp_event(self.RJ45_port_list)
self.sfp_event = sfp_event()
self.sfp_event.initialize()

wait_for_ever = (timeout == 0)
Expand Down
109 changes: 77 additions & 32 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,44 @@ def __exit__(self, exc_type, exc_val, exc_tb):
deinitialize_sdk_handle(self.sdk_handle)


class SFP(SfpOptoeBase):
class NvidiaSFPCommon(SfpOptoeBase):
def __init__(self, sfp_index):
super(NvidiaSFPCommon, self).__init__()
self.index = sfp_index + 1
self.sdk_index = sfp_index

@property
def sdk_handle(self):
if not SFP.shared_sdk_handle:
SFP.shared_sdk_handle = initialize_sdk_handle()
if not SFP.shared_sdk_handle:
logger.log_error('Failed to open SDK handle')
return SFP.shared_sdk_handle

@classmethod
def _get_module_info(self, sdk_handle, sdk_index):
"""
Get error code of the SFP module
Returns:
The error code fetch from SDK API
"""
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)

module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = sdk_index
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(sdk_handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
return mod_info.module_state.oper_state, mod_info.module_state.error_type


class SFP(NvidiaSFPCommon):
"""Platform-specific SFP class"""
shared_sdk_handle = None
SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE = 'Long range for non-Mellanox cable or module'
Expand All @@ -253,13 +290,10 @@ class SFP(SfpOptoeBase):
SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000

def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, lc_name=None):
super(SFP, self).__init__()
super(SFP, self).__init__(sfp_index)
self._sfp_type = sfp_type

if slot_id == 0: # For non-modular chassis
self.index = sfp_index + 1
self.sdk_index = sfp_index

from .thermal import initialize_sfp_thermal
self._thermal_list = initialize_sfp_thermal(sfp_index)
else: # For modular chassis
Expand All @@ -284,13 +318,15 @@ def get_mst_pci_device(self):
logger.log_error("Failed to find mst PCI device rc={} err.msg={}".format(e.returncode, e.output))
return device_name

'''
@property
def sdk_handle(self):
if not SFP.shared_sdk_handle:
SFP.shared_sdk_handle = initialize_sdk_handle()
if not SFP.shared_sdk_handle:
logger.log_error('Failed to open SDK handle')
return SFP.shared_sdk_handle
'''

def reinit(self):
"""
Expand Down Expand Up @@ -673,27 +709,6 @@ def is_replaceable(self):
"""
return True

def _get_error_code(self):
"""
Get error code of the SFP module
Returns:
The error code fetch from SDK API
"""
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)

module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = self.sdk_index
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(self.sdk_handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
return mod_info.module_state.oper_state, mod_info.module_state.error_type

@classmethod
def _get_error_description_dict(cls):
return {0: cls.SFP_ERROR_DESCRIPTION_POWER_BUDGET_EXCEEDED,
Expand All @@ -714,12 +729,12 @@ def get_error_description(self):
Get error description
Args:
error_code: The error code returned by _get_error_code
error_code: The error code returned by _get_module_info
Returns:
The error description
"""
oper_status, error_code = self._get_error_code()
oper_status, error_code = self._get_module_info(self.sdk_handle, self.sdk_index)
if oper_status == SX_PORT_MODULE_STATUS_INITIALIZING:
error_description = self.SFP_STATUS_INITIALIZING
elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED:
Expand All @@ -739,13 +754,28 @@ def get_error_description(self):
return error_description


class RJ45Port(SfpOptoeBase):
class RJ45Port(NvidiaSFPCommon):
"""class derived from SFP, representing RJ45 ports"""

def __init__(self, sfp_index):
super(RJ45Port, self).__init__()
super(RJ45Port, self).__init__(sfp_index)
self.sfp_type = RJ45_TYPE

@classmethod
def _get_presence(cls, sdk_handle, sdk_index):
"""Class level method to get low power mode.
Args:
sdk_handle: SDK handle
sdk_index (integer): SDK port index
slot_id (integer): Slot ID
Returns:
[boolean]: True if low power mode is on else off
"""
oper_status, _ = cls._get_module_info(sdk_handle, sdk_index)
return print(oper_status == SX_PORT_MODULE_STATUS_PLUGGED)

def get_presence(self):
"""
Retrieves the presence of the device
Expand All @@ -754,7 +784,22 @@ def get_presence(self):
Returns:
bool: True if device is present, False if not
"""
return True
if utils.is_host():
# To avoid performance issue,
# call class level method to avoid initialize the whole sonic platform API
get_presence_code = 'from sonic_platform import sfp;\n' \
'with sfp.SdkHandleContext() as sdk_handle:' \
'print(sfp.RJ45Port._get_presence(sdk_handle, {}))'.format(self.sdk_index)
presence_cmd = "docker exec pmon python3 -c \"{}\"".format(get_presence_code)
try:
output = subprocess.check_output(presence_cmd, shell=True, universal_newlines=True)
return 'True' in output
except subprocess.CalledProcessError as e:
print("Error! Unable to get presence for {}, rc = {}, err msg: {}".format(self.sdk_index, e.returncode, e.output))
return False
else:
oper_status, _ = self._get_module_info(self.sdk_handle, self.sdk_index);
return (oper_status == SX_PORT_MODULE_STATUS_PLUGGED)

def get_transceiver_info(self):
"""
Expand Down Expand Up @@ -841,7 +886,7 @@ def get_error_description(self):
Get error description
Args:
error_code: The error code returned by _get_error_code
error_code: Always false on SN2201
Returns:
The error description
Expand Down
88 changes: 2 additions & 86 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,6 @@ class MockSxFd(object):
SDK_SFP_STATE_DIS: str(SFP.SFP_STATUS_BIT_REMOVED),
}

"""
RJ45 ports events definition
For RJ45, get_present always returns True.
In case an unplug / unknown event is reported for a port, error status is leveraged for representing them.
The following events will be used.
- Unknown: 2147483648 => 0x80000000
- Unplug: 1073741824 => 0x40000000
According to the error status design, the upper half (bits 16 ~ 31) are the events are encoded from bit 16 (0x0001000),
using those numbers will avoid conflict as much as possible
"""
RJ45_UNPLUG_EVENT = '1073741824'
RJ45_UNKNOWN_EVENT = '2147483648'

# system level event/error
EVENT_ON_ALL_SFP = '-1'
SYSTEM_NOT_READY = 'system_not_ready'
Expand All @@ -147,17 +134,14 @@ class sfp_event:
SX_OPEN_TIMEOUT = 5
SELECT_TIMEOUT = 1

def __init__(self, rj45_port_list=None):
def __init__(self):
self.swid = 0
self.handle = None

# Allocate SDK fd and user channel structures
self.rx_fd_p = new_sx_fd_t_p()
self.user_channel_p = new_sx_user_channel_t_p()

self.RJ45_port_list = rj45_port_list
self.absent_ports_before_init = []

def initialize(self):
swid_cnt_p = None

Expand Down Expand Up @@ -221,30 +205,6 @@ def initialize(self):

if rc != SX_STATUS_SUCCESS:
raise RuntimeError("sx_api_host_ifc_trap_id_register_set failed with rc {}, exiting...".format(rc))

if self.RJ45_port_list:
# Fetch the present state of each RJ45 port.
module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1)
module_info_list = new_sx_mgmt_phy_module_info_t_arr(1)
absent_port_list = []

for i in self.RJ45_port_list:
module_id_info = sx_mgmt_module_id_info_t()
module_id_info.slot_id = 0
module_id_info.module_id = i
sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info)

rc = sx_mgmt_phy_module_info_get(self.handle, module_id_info_list, 1, module_info_list)
assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc)

mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0)
if mod_info.module_state.oper_state not in [SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED]:
absent_port_list.append(i)

self.absent_ports_before_init = absent_port_list

delete_sx_mgmt_module_id_info_t_arr(module_id_info_list)
delete_sx_mgmt_phy_module_info_t_arr(module_info_list)
except Exception as e:
logger.log_error("sfp_event initialization failed due to {}, exiting...".format(repr(e)))
if swid_cnt_p is not None:
Expand Down Expand Up @@ -294,15 +254,6 @@ def check_sfp_status(self, port_change, error_dict, timeout):
to repeat calling it with timeout = 0 in a loop until no new notification read (in this case it returns false).
by doing so all the notifications in the fd can be retrieved through a single call to get_change_event.
"""
if self.absent_ports_before_init:
# This is the first time this method is called.
# We should return the ports that are not present during initialization
for i in self.absent_ports_before_init:
error_dict[i + 1] = 'Not present'
port_change[i + 1] = RJ45_UNPLUG_EVENT
self.absent_ports_before_init = []
return True

found = 0

try:
Expand Down Expand Up @@ -333,16 +284,7 @@ def check_sfp_status(self, port_change, error_dict, timeout):
# 3. and then the sfp module is removed
# 4. sfp_event starts to try fetching the change event
# in this case found is increased so that True will be returned
# For RJ45 ports, we will report "unknown" event anyway since it's legal
reported = 0
if self.RJ45_port_list:
for port in port_list:
if port in self.RJ45_port_list:
port_change[port+1] = sfp_state
reported += 1

if not reported:
logger.log_info("unknown module state {}, maybe the port suffers two adjacent insertion/removal".format(module_state))
logger.log_info("unknown module state {}, maybe the port suffers two adjacent insertion/removal".format(module_state))
found += 1
continue

Expand Down Expand Up @@ -373,32 +315,6 @@ def check_sfp_status(self, port_change, error_dict, timeout):
error_dict[port+1] = error_description
found += 1

if self.RJ45_port_list:
# Translate any plugin/plugout event into error dict for RJ45 ports
unknown_port = set()
unplug_port = set()
for index, event in port_change.items():
if index in self.RJ45_port_list:
# Remove it from port event
# check if it's unknown event
if event == '0': # Remove event
unplug_port.add(index)
elif event != '1':
unknown_port.add(index)
# This is to leverage TRANSCEIVER_STATUS table to represent 'Not present' and 'Unknown' state
# The event should satisfies:
# - Vendor specific error bit
# - Non-blocking bit
# Currently, the 2 MSBs are reserved for this since they are most unlikely to be used in the near future
for index in unknown_port:
# Bit 31 for unknown
port_change[index] = RJ45_UNKNOWN_EVENT
error_dict[index] = 'Unknown'
for index in unplug_port:
# Bit 30 for not present
port_change[index] = RJ45_UNPLUG_EVENT
error_dict[index] = 'Not present'

return found != 0

def on_pmpe(self, fd_p):
Expand Down
3 changes: 2 additions & 1 deletion platform/mellanox/mlnx-platform-api/tests/test_sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def test_sfp_index(self, mock_max_port):
assert sfp.index == 5

@mock.patch('sonic_platform.sfp.SFP.read_eeprom', mock.MagicMock(return_value=None))
@mock.patch('sonic_platform.sfp.SFP._get_error_code')
@mock.patch('sonic_platform.sfp.SFP.shared_sdk_handle', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.sfp.SFP._get_module_info')
@mock.patch('sonic_platform.chassis.Chassis.get_num_sfps', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.chassis.extract_RJ45_ports_index', mock.MagicMock(return_value=[]))
def test_sfp_get_error_status(self, mock_get_error_code):
Expand Down
45 changes: 1 addition & 44 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setup_class(cls):
os.environ["MLNX_PLATFORM_API_UNIT_TESTING"] = "1"

@patch('select.select', MagicMock(return_value=([99], None, None)))
def test_check_sfp_status_xsfp(self):
def test_check_sfp_status(self):
from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR
from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS

Expand Down Expand Up @@ -59,46 +59,3 @@ def executor(self, mock_module_state, mock_error_type, expect_status, descriptio
if description:
assert 1 in error_dict and error_dict[1] == description
assert 2 in error_dict and error_dict[2] == description

@patch('select.select', MagicMock(return_value=([99], None, None)))
def test_check_sfp_status_rj45(self):
from sonic_platform.sfp_event import sfp_event
from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT
from sonic_platform.sfp_event import RJ45_UNPLUG_EVENT, RJ45_UNKNOWN_EVENT

# Verify absent ports before initialization
event = sfp_event([0, 1, 2, 3])
event.absent_ports_before_init = [0,1]
port_change = {}
error_dict = {}
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {1: RJ45_UNPLUG_EVENT, 2: RJ45_UNPLUG_EVENT}
assert error_dict == {1: 'Not present', 2: 'Not present'}

# Unplug event
event.on_pmpe = MagicMock(return_value=(True, [2], SDK_SFP_STATE_OUT, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: RJ45_UNPLUG_EVENT}
assert error_dict == {3: 'Not present'}

# Plug event
event.on_pmpe = MagicMock(return_value=(True, [2], SDK_SFP_STATE_IN, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: str(SDK_SFP_STATE_IN)}
assert error_dict == {}

# Unknown event
event.on_pmpe = MagicMock(return_value=(True, [2], -1, None))
port_change.clear()
error_dict.clear()
found = event.check_sfp_status(port_change, error_dict, 0)
assert found
assert port_change == {3: RJ45_UNKNOWN_EVENT}
assert error_dict == {3: 'Unknown'}

0 comments on commit 5c82369

Please sign in to comment.