Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pcieutil] Add 'pcie-aer' sub-command to display AER stats #1169

Merged
merged 9 commits into from
Jan 26, 2021
167 changes: 167 additions & 0 deletions pcieutil/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@

try:
import os
import re
import sys
from collections import OrderedDict

import click
from sonic_py_common import device_info, logger
from swsssdk import SonicV2Connector
from tabulate import tabulate
import utilities_common.cli as clicommon
except ImportError as e:
raise ImportError("%s - required module not found" % str(e))

Expand Down Expand Up @@ -105,6 +110,168 @@ def show():
click.echo("bus:dev.fn %s:%s.%s - dev_id=0x%s, %s" % (Bus, Dev, Fn, Id, Name))


# PCIe AER stats helpers

aer_fields = {
"correctable": ['RxErr', 'BadTLP', 'BadDLLP', 'Rollover', 'Timeout', 'NonFatalErr', 'CorrIntErr', 'HeaderOF', 'TOTAL_ERR_COR'],
"fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_FATAL'],
"non_fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_NONFATAL']
}


class PcieDevice(click.ParamType):
name = "<Bus>:<Dev>.<Fn>"

def convert(self, value, param, ctx):
match = re.match(r'([0-9A-Fa-f]{1,2}):([0-9A-Fa-f]{1,2})\.([0-9A-Fa-f])', value)

if not match:
self.fail('{} is not in <Bus>:<Dev>.<Fn> format'.format(value), param, ctx)

Bus, Dev, Fn = [int(val, 16) for val in match.groups()]
if Bus > 255:
self.fail('Invalid Bus number', param, ctx)

if Dev > 31:
self.fail('Invalid Dev number', param, ctx)

if Fn > 7:
self.fail('Invalid Fn number', param, ctx)

return "%02x:%02x.%d" % (Bus, Dev, Fn)


_pcie_aer_click_options = [
click.Option(['-d', '--device', 'device_key'],
type=PcieDevice(),
help="Display stats only for the specified device"),
click.Option(['-v', '--verbose'],
is_flag=True,
help="Display all stats")
]


class PcieAerCommand(click.Command):
'''This subclass of click.Command provides common options, help
and short help text for PCIe AER commands'''

def __init__(self, *args, **kwargs):
super(PcieAerCommand, self).__init__(*args, **kwargs)
self.params = _pcie_aer_click_options

def format_help_text(self, ctx, formatter):
formatter.write_paragraph()
with formatter.indentation():
formatter.write_text("Show {} PCIe AER attributes".format(self.name.replace("_", "-")))
formatter.write_text("(Default: Display only non-zero attributes)")

def get_short_help_str(self, limit):
return "Show {} PCIe AER attributes".format(self.name.replace("_", "-"))


def pcie_aer_display(ctx, severity):
device_key = ctx.params['device_key']
no_zero = not ctx.params['verbose']
header = ["AER - " + severity.upper().replace("_", "")]
fields = aer_fields[severity]
pcie_dev_list = list()
dev_found = False

statedb = SonicV2Connector()
statedb.connect(statedb.STATE_DB)

table = OrderedDict()
for field in fields:
table[field] = [field]

if device_key:
pcie_dev_list = ["PCIE_DEVICE|%s" % device_key]
else:
keys = statedb.keys(statedb.STATE_DB, "PCIE_DEVICE|*")
if keys:
pcie_dev_list = sorted(keys)

for pcie_dev_key in pcie_dev_list:
aer_attribute = statedb.get_all(statedb.STATE_DB, pcie_dev_key)
if not aer_attribute:
continue

if device_key:
dev_found = True

if no_zero and all(val == '0' for key, val in aer_attribute.items() if key.startswith(severity)):
continue

pcie_dev = pcie_dev_key.split("|")[1]
Id = aer_attribute['id']

# Tabulate Header
device_name = "%s\n%s" % (pcie_dev, Id)
header.append(device_name)

# Tabulate Row
for field in fields:
key = severity + "|" + field
table[field].append(aer_attribute.get(key, 'NA'))

if device_key and not dev_found:
ctx.exit("Device not found in DB")

# Strip fields with no non-zero value
if no_zero:
for field in fields:
if all(val == '0' for val in table[field][1:]):
del table[field]

if not (no_zero and (len(header) == 1)):
if ctx.obj:
click.echo("")

click.echo(tabulate(list(table.values()), header, tablefmt="grid"))
ctx.obj = True


# Show PCIe AER status
@cli.group(cls=clicommon.AliasedGroup)
@click.pass_context
def pcie_aer(ctx):
'''Display PCIe AER status'''
# Set True to insert a line between severities in 'all' context
ctx.obj = False


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def correctable(ctx, device_key, verbose):
'''Show correctable PCIe AER attributes'''
pcie_aer_display(ctx, "correctable")


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def fatal(ctx, device_key, verbose):
'''Show fatal PCIe AER attributes'''
pcie_aer_display(ctx, "fatal")


@pcie_aer.command(cls=PcieAerCommand)
@click.pass_context
def non_fatal(ctx, device_key, verbose):
'''Show non-fatal PCIe AER attributes'''
pcie_aer_display(ctx, "non_fatal")


@pcie_aer.command(name='all', cls=PcieAerCommand)
@click.pass_context
def all_errors(ctx, device_key, verbose):
'''Show all PCIe AER attributes'''
pcie_aer_display(ctx, "correctable")
pcie_aer_display(ctx, "fatal")
pcie_aer_display(ctx, "non_fatal")


# Show PCIE Vender ID and Device ID
@cli.command()
def check():
Expand Down
98 changes: 98 additions & 0 deletions tests/mock_tables/state_db.json
Original file line number Diff line number Diff line change
Expand Up @@ -389,5 +389,103 @@
"CHASSIS_MIDPLANE_TABLE|LINE-CARD1": {
"ip_address": "192.168.1.2",
"access": "False"
},
"PCIE_DEVICE|00:01.0": {
"correctable|BadDLLP": "0",
"correctable|BadTLP": "0",
"correctable|BadTLP": "1",
"correctable|CorrIntErr": "0",
"correctable|HeaderOF": "0",
"correctable|NonFatalErr": "0",
"correctable|Rollover": "0",
"correctable|RxErr": "0",
"correctable|TOTAL_ERR_COR": "0",
"correctable|TOTAL_ERR_COR": "1",
"correctable|Timeout": "0",
"fatal|ACSViol": "0",
"fatal|AtomicOpBlocked": "0",
"fatal|BlockedTLP": "0",
"fatal|CmpltAbrt": "0",
"fatal|CmpltTO": "0",
"fatal|DLP": "0",
"fatal|ECRC": "0",
"fatal|FCP": "0",
"fatal|MalfTLP": "0",
"fatal|RxOF": "0",
"fatal|SDES": "0",
"fatal|TLP": "0",
"fatal|TLPBlockedErr": "0",
"fatal|TOTAL_ERR_FATAL": "0",
"fatal|UncorrIntErr": "0",
"fatal|Undefined": "0",
"fatal|UnsupReq": "0",
"fatal|UnxCmplt": "0",
"id": "0x0001",
"non_fatal|ACSViol": "0",
"non_fatal|AtomicOpBlocked": "0",
"non_fatal|BlockedTLP": "0",
"non_fatal|CmpltAbrt": "0",
"non_fatal|CmpltTO": "0",
"non_fatal|DLP": "0",
"non_fatal|ECRC": "0",
"non_fatal|FCP": "0",
"non_fatal|MalfTLP": "1",
"non_fatal|RxOF": "0",
"non_fatal|SDES": "0",
"non_fatal|TLP": "0",
"non_fatal|TLPBlockedErr": "0",
"non_fatal|TOTAL_ERR_NONFATAL": "1",
"non_fatal|UncorrIntErr": "0",
"non_fatal|Undefined": "0",
"non_fatal|UnsupReq": "0",
"non_fatal|UnxCmplt": "0"
},
"PCIE_DEVICE|01:00.0": {
"correctable|BadDLLP": "0",
"correctable|BadTLP": "0",
"correctable|CorrIntErr": "0",
"correctable|HeaderOF": "0",
"correctable|NonFatalErr": "0",
"correctable|Rollover": "0",
"correctable|RxErr": "1",
"correctable|TOTAL_ERR_COR": "1",
"correctable|Timeout": "0",
"fatal|ACSViol": "0",
"fatal|AtomicOpBlocked": "0",
"fatal|BlockedTLP": "0",
"fatal|CmpltAbrt": "0",
"fatal|CmpltTO": "0",
"fatal|DLP": "0",
"fatal|ECRC": "0",
"fatal|FCP": "0",
"fatal|MalfTLP": "0",
"fatal|RxOF": "0",
"fatal|SDES": "0",
"fatal|TLP": "0",
"fatal|TLPBlockedErr": "0",
"fatal|TOTAL_ERR_FATAL": "0",
"fatal|UncorrIntErr": "0",
"fatal|Undefined": "0",
"fatal|UnsupReq": "0",
"fatal|UnxCmplt": "0",
"id": "0x0002",
"non_fatal|ACSViol": "0",
"non_fatal|AtomicOpBlocked": "0",
"non_fatal|BlockedTLP": "0",
"non_fatal|CmpltAbrt": "0",
"non_fatal|CmpltTO": "0",
"non_fatal|DLP": "0",
"non_fatal|ECRC": "0",
"non_fatal|FCP": "0",
"non_fatal|MalfTLP": "0",
"non_fatal|RxOF": "0",
"non_fatal|SDES": "0",
"non_fatal|TLP": "0",
"non_fatal|TLPBlockedErr": "0",
"non_fatal|TOTAL_ERR_NONFATAL": "0",
"non_fatal|UncorrIntErr": "0",
"non_fatal|Undefined": "0",
"non_fatal|UnsupReq": "0",
"non_fatal|UnxCmplt": "0"
}
}
Loading