Skip to content

Commit

Permalink
[sfputil] Expose error status fetched from STATE_DB or platform API t…
Browse files Browse the repository at this point in the history
…o CLI (sonic-net#1658)

Expose error status fetched from STATE_DB or platform API to CLI.

The command is
- `sfputil show error-status [-hw|--fetch-from-hardware] [<interface_name>]`
- and `show interfaces transceiver error-status [-hw|--fetch-from-hardware] [<interface_name>]`

The error status will be fetched from
- `STATE_DB` by default
- hardware via platform API if the parameter `--fetch-from-hardware` is provided.
  In this case, the CLI will call platform API in the pmon docker and format the output.

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs authored Jun 25, 2021
1 parent c5d00ae commit 38f8c06
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 1 deletion.
11 changes: 10 additions & 1 deletion doc/Command-Reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,7 @@ This command displays information for all the interfaces for the transceiver req

- Usage:
```
show interfaces transceiver (eeprom [-d|--dom] | lpmode | presence) [<interface_name>]
show interfaces transceiver (eeprom [-d|--dom] | lpmode | presence | error-status [-hw|--fetch-from-hardware]) [<interface_name>]
```

- Example (Decode and display information stored on the EEPROM of SFP transceiver connected to Ethernet0):
Expand Down Expand Up @@ -937,6 +937,15 @@ This command displays information for all the interfaces for the transceiver req
----------- ----------
Ethernet100 Present
```

- Example (Display error status of SFP transceiver connected to Ethernet100):
```
admin@sonic:~$ show interfaces transceiver error-status Ethernet100
Port Error Status
----------- --------------
Ethernet100 OK
```

Go Back To [Beginning of the document](#) or [Beginning of this section](#basic-show-commands)

## AAA & TACACS+
Expand Down
134 changes: 134 additions & 0 deletions sfputil/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@

import os
import sys
import natsort
import ast

import subprocess
import click
import sonic_platform
import sonic_platform_base.sonic_sfp.sfputilhelper
from swsscommon.swsscommon import SonicV2Connector
from natsort import natsorted
from sonic_py_common import device_info, logger, multi_asic
from tabulate import tabulate
Expand Down Expand Up @@ -615,6 +619,136 @@ def presence(port):
click.echo(tabulate(output_table, table_header, tablefmt="simple"))


# 'error-status' subcommand
def fetch_error_status_from_platform_api(port):
"""Fetch the error status from platform API and return the output as a string
Args:
port: the port whose error status will be fetched.
None represents for all ports.
Returns:
A string consisting of the error status of each port.
"""
if port is None:
logical_port_list = natsort.natsorted(platform_sfputil.logical)
# Create a list containing the logical port names of all ports we're interested in
generate_sfp_list_code = \
"sfp_list = chassis.get_all_sfps()\n"
else:
physical_port_list = logical_port_name_to_physical_port_list(port)
logical_port_list = [port]
# Create a list containing the logical port names of all ports we're interested in
generate_sfp_list_code = \
"sfp_list = [chassis.get_sfp(x) for x in {}]\n".format(physical_port_list)

# Code to initialize chassis object
init_chassis_code = \
"import sonic_platform.platform\n" \
"platform = sonic_platform.platform.Platform()\n" \
"chassis = platform.get_chassis()\n"

# Code to fetch the error status
get_error_status_code = \
"try:\n"\
" errors=['{}:{}'.format(sfp.index, sfp.get_error_description()) for sfp in sfp_list]\n" \
"except NotImplementedError as e:\n"\
" errors=['{}:{}'.format(sfp.index, 'OK (Not implemented)') for sfp in sfp_list]\n" \
"print(errors)\n"

get_error_status_command = "docker exec pmon python3 -c \"{}{}{}\"".format(
init_chassis_code, generate_sfp_list_code, get_error_status_code)
# Fetch error status from pmon docker
try:
output = subprocess.check_output(get_error_status_command, shell=True, universal_newlines=True)
except subprocess.CalledProcessError as e:
click.Abort("Error! Unable to fetch error status for SPF modules. Error code = {}, error messages: {}".format(e.returncode, e.output))
return None

output_list = output.split('\n')
for output_str in output_list:
# The output of all SFP error status are a list consisting of element with convention of '<sfp no>:<error status>'
# Besides, there can be some logs captured during the platform API executing
# So, first of all, we need to skip all the logs until find the output list of SFP error status
if output_str[0] == '[' and output_str[-1] == ']':
output_list = ast.literal_eval(output_str)
break

output_dict = {}
for output in output_list:
sfp_index, error_status = output.split(':')
output_dict[int(sfp_index)] = error_status

output = []
for logical_port_name in logical_port_list:
physical_port_list = logical_port_name_to_physical_port_list(logical_port_name)
port_name = get_physical_port_name(logical_port_name, 1, False)

output.append([port_name, output_dict.get(physical_port_list[0])])

return output

def fetch_error_status_from_state_db(port, state_db):
"""Fetch the error status from STATE_DB and return them in a list.
Args:
port: the port whose error status will be fetched.
None represents for all ports.
Returns:
A list consisting of tuples (port, description) and sorted by port.
"""
status = {}
if port:
status[port] = state_db.get_all(state_db.STATE_DB, 'TRANSCEIVER_STATUS|{}'.format(port))
else:
ports = state_db.keys(state_db.STATE_DB, 'TRANSCEIVER_STATUS|*')
for key in ports:
status[key.split('|')[1]] = state_db.get_all(state_db.STATE_DB, key)

sorted_ports = natsort.natsorted(status)
output = []
for port in sorted_ports:
statestring = status[port].get('status')
description = status[port].get('error')
if statestring == '1':
description = 'OK'
elif statestring == '0':
description = 'Unplugged'
elif description == 'N/A':
log.log_error("Inconsistent state found for port {}: state is {} but error description is N/A".format(port, statestring))
description = 'Unknown state: {}'.format(statestring)

output.append([port, description])

return output

@show.command()
@click.option('-p', '--port', metavar='<port_name>', help="Display SFP error status for port <port_name> only")
@click.option('-hw', '--fetch-from-hardware', 'fetch_from_hardware', is_flag=True, default=False, help="Fetch the error status from hardware directly")
def error_status(port, fetch_from_hardware):
"""Display error status of SFP transceiver(s)"""
output_table = []
table_header = ["Port", "Error Status"]

# Create a list containing the logical port names of all ports we're interested in
if port and platform_sfputil.is_logical_port(port) == 0:
click.echo("Error: invalid port '{}'\n".format(port))
click.echo("Valid values for port: {}\n".format(str(platform_sfputil.logical)))
sys.exit(ERROR_INVALID_PORT)

if fetch_from_hardware:
output_table = fetch_error_status_from_platform_api(port)
else:
# Connect to STATE_DB
state_db = SonicV2Connector(host='127.0.0.1')
if state_db is not None:
state_db.connect(state_db.STATE_DB)
else:
click.echo("Failed to connect to STATE_DB")
return

output_table = fetch_error_status_from_state_db(port, state_db)

click.echo(tabulate(output_table, table_header, tablefmt='simple'))


# 'lpmode' subcommand
@show.command()
@click.option('-p', '--port', metavar='<port_name>', help="Display SFP low-power mode status for port <port_name> only")
Expand Down
27 changes: 27 additions & 0 deletions show/interfaces/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os

import subprocess
import click
import utilities_common.cli as clicommon
import utilities_common.multi_asic as multi_asic_util
Expand All @@ -10,6 +11,7 @@
from sonic_py_common import device_info
from swsscommon.swsscommon import ConfigDBConnector
from portconfig import get_child_ports
import sonic_platform_base.sonic_sfp.sfputilhelper

from . import portchannel
from collections import OrderedDict
Expand Down Expand Up @@ -396,6 +398,31 @@ def presence(db, interfacename, namespace, verbose):
clicommon.run_command(cmd, display_cmd=verbose)


@transceiver.command()
@click.argument('interfacename', required=False)
@click.option('--fetch-from-hardware', '-hw', 'fetch_from_hardware', is_flag=True, default=False)
@click.option('--namespace', '-n', 'namespace', default=None, show_default=True,
type=click.Choice(multi_asic_util.multi_asic_ns_choices()), help='Namespace name or all')
@click.option('--verbose', is_flag=True, help="Enable verbose output")
@clicommon.pass_db
def error_status(db, interfacename, fetch_from_hardware, namespace, verbose):
""" Show transceiver error-status """

ctx = click.get_current_context()

cmd = "sudo sfputil show error-status"

if interfacename is not None:
interfacename = try_convert_interfacename_from_alias(ctx, interfacename)

cmd += " -p {}".format(interfacename)

if fetch_from_hardware:
cmd += " -hw"

clicommon.run_command(cmd, display_cmd=verbose)


#
# counters group ("show interfaces counters ...")
#
Expand Down
16 changes: 16 additions & 0 deletions tests/mock_tables/state_db.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,22 @@
"txpowerlowalarm": "-10.5012",
"txpowerlowwarning": "-7.5007"
},
"TRANSCEIVER_STATUS|Ethernet0": {
"status": "67",
"error": "Blocking Error|High temperature"
},
"TRANSCEIVER_STATUS|Ethernet4": {
"status": "1",
"error": "N/A"
},
"TRANSCEIVER_STATUS|Ethernet8": {
"status": "0",
"error": "N/A"
},
"TRANSCEIVER_STATUS|Ethernet12": {
"status": "255",
"error": "N/A"
},
"CHASSIS_INFO|chassis 1": {
"psu_num": "2"
},
Expand Down
16 changes: 16 additions & 0 deletions tests/sfputil_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import os
from unittest import mock

from .mock_tables import dbconnector

import pytest
from click.testing import CliRunner
from utilities_common.db import Db

test_path = os.path.dirname(os.path.abspath(__file__))
modules_path = os.path.dirname(test_path)
Expand Down Expand Up @@ -173,3 +176,16 @@ def test_version(self):
runner = CliRunner()
result = runner.invoke(sfputil.cli.commands['version'], [])
assert result.output.rstrip() == 'sfputil version {}'.format(sfputil.VERSION)

def test_error_status_from_db(self):
db = Db()
expected_output = [['Ethernet0', 'Blocking Error|High temperature'],
['Ethernet4', 'OK'],
['Ethernet8', 'Unplugged'],
['Ethernet12', 'Unknown state: 255']]
output = sfputil.fetch_error_status_from_state_db(None, db.db)
assert output == expected_output

expected_output_ethernet0 = expected_output[:1]
output = sfputil.fetch_error_status_from_state_db('Ethernet0', db.db)
assert output == expected_output_ethernet0

0 comments on commit 38f8c06

Please sign in to comment.