-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[201911] DellEMC S6100 SSD Monitor (#6934)
Why I did it To monitor the SSD health condition in DellEMC S6100 platform post upgrade. A daemon is introduced to monitor the SSD every one hour. To check for SSD status at boot time and at the time of cold-reboot. All these changes are supported only for newer SSD firmware. Added a platform_reboot_pre_check script to prevent cold-reboot based on SSD status. Depends on sonic-net/sonic-utilities#1472 DO NOT MERGE UNTIL ABOVE PR IS MERGED
- Loading branch information
1 parent
9b553d9
commit 140576d
Showing
9 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
28 changes: 28 additions & 0 deletions
28
platform/broadcom/sonic-platform-modules-dell/s6100/scripts/platform_reboot_pre_check
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
SSD_FW_UPGRADE="/host/ssd_fw_upgrade" | ||
|
||
# Check SSD Status | ||
if [ -e $SSD_FW_UPGRADE/GPIO7_low ] || [ -e $SSD_FW_UPGRADE/GPIO7_error ] || [ -e $SSD_FW_UPGRADE/GPIO_pending_upgrade ]; then | ||
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty and does not support cold reboot." | ||
logger -p user.crit -t DELL_S6100_SSD_MON "Please perform a soft-/fast-/warm-reboot instead" | ||
exit 1 | ||
fi | ||
|
||
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then | ||
iSMART="/usr/local/bin/iSMART_64" | ||
iSMART_OPTIONS="-d /dev/sda" | ||
|
||
iSMART_CMD=`$iSMART $iSMART_OPTIONS` | ||
|
||
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}') | ||
|
||
if [ $GPIO_STATUS == "0x01" ];then | ||
exit 0 | ||
else | ||
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty and does not support cold reboot." | ||
logger -p user.crit -t DELL_S6100_SSD_MON "Please perform a soft-/fast-/warm-reboot instead" | ||
exit 1 | ||
fi | ||
fi | ||
|
||
exit 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
21 changes: 21 additions & 0 deletions
21
platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_ssd_mon.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
|
||
SSD_FW_UPGRADE="/host/ssd_fw_upgrade" | ||
|
||
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then | ||
iSMART="/usr/local/bin/iSMART_64" | ||
iSMART_OPTIONS="-d /dev/sda" | ||
|
||
iSMART_CMD=`$iSMART $iSMART_OPTIONS` | ||
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}') | ||
|
||
if [ $GPIO_STATUS != "0x01" ];then | ||
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty and does not support cold reboot." | ||
logger -p user.crit -t DELL_S6100_SSD_MON "If a reboot is required, please perform a soft-/fast-/warm-reboot." | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_low | ||
systemctl stop s6100-ssd-monitor.timer | ||
fi | ||
else | ||
systemctl stop s6100-ssd-monitor.timer | ||
fi |
112 changes: 112 additions & 0 deletions
112
platform/broadcom/sonic-platform-modules-dell/s6100/scripts/s6100_ssd_upgrade_status.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/bin/bash | ||
|
||
SSD_FW_UPGRADE="/host/ssd_fw_upgrade" | ||
|
||
if [ -e $SSD_FW_UPGRADE/GPIO7_high ]; then | ||
systemctl start --no-block s6100-ssd-monitor.timer | ||
exit 0 | ||
fi | ||
|
||
if [ -e $SSD_FW_UPGRADE/GPIO7_low ] || [ -e $SSD_FW_UPGRADE/GPIO7_error ]; then | ||
exit 0 | ||
fi | ||
|
||
[ ! -d $SSD_FW_UPGRADE ] && mkdir $SSD_FW_UPGRADE | ||
|
||
SSD_UPGRADE_LOG="$SSD_FW_UPGRADE/upgrade.log" | ||
|
||
SMART_CMD=`smartctl -a /dev/sda` | ||
|
||
SSD_FW_VERSION=$(echo "$SMART_CMD" | grep "Firmware Version" | awk '{print $NF}') | ||
SSD_MODEL=$(echo "$SMART_CMD" | grep "Device Model" | awk '{print $NF}') | ||
|
||
if [ -e $SSD_FW_UPGRADE/GPIO7_pending_upgrade ]; then | ||
if [ $SSD_FW_VERSION == "S141002C" ] || [ $SSD_FW_VERSION == "S16425c1" ]; then | ||
# If SSD Firmware is not upgraded | ||
exit 0 | ||
fi | ||
fi | ||
|
||
echo "$0 `date` SSD FW upgrade logs post reboot." >> $SSD_UPGRADE_LOG | ||
|
||
iSMART="/usr/local/bin/iSMART_64" | ||
iSMART_OPTIONS="-d /dev/sda" | ||
iSMART_CMD=`$iSMART $iSMART_OPTIONS` | ||
|
||
SSD_UPGRADE_STATUS1=`io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 09 --offset 211; io_rd_wr.py --get --offset 212` | ||
SSD_UPGRADE_STATUS1=$(echo "$SSD_UPGRADE_STATUS1" | awk '{print $NF}') | ||
|
||
SSD_UPGRADE_STATUS2=`io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 0A --offset 211; io_rd_wr.py --get --offset 212` | ||
SSD_UPGRADE_STATUS2=$(echo "$SSD_UPGRADE_STATUS2" | awk '{print $NF}') | ||
|
||
if [ $SSD_UPGRADE_STATUS1 == "2" ]; then | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_error | ||
|
||
echo "$0 `date` Upgraded to unknown version after first mp_64 upgrade." >> $SSD_UPGRADE_LOG | ||
|
||
elif [ $SSD_UPGRADE_STATUS2 == "2" ];then | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_error | ||
|
||
echo "$0 `date` Upgraded to unknown version after second mp_64 upgrade." >> $SSD_UPGRADE_LOG | ||
|
||
elif [ $SSD_FW_VERSION == "S141002G" ] || [ $SSD_FW_VERSION == "S16425cG" ]; then | ||
# If SSD Firmware is upgraded | ||
GPIO_STATUS=$(echo "$iSMART_CMD" | grep GPIO | awk '{print $NF}') | ||
|
||
if [ $GPIO_STATUS != "0x01" ];then | ||
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty and does not support reboot." | ||
logger -p user.crit -t DELL_S6100_SSD_MON "If a reboot is required, please perform a soft-/fast-/warm-reboot." | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_low | ||
echo "$0 `date` The SSD on this unit is faulty and does not support cold reboot." >> $SSD_UPGRADE_LOG | ||
echo "$0 `date` If a reboot is required, please perform a soft-/fast-/warm-reboot." >> $SSD_UPGRADE_LOG | ||
|
||
else | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_high | ||
fi | ||
|
||
systemctl start --no-block s6100-ssd-monitor.timer | ||
|
||
if [ $SSD_UPGRADE_STATUS1 == "0" ]; then | ||
if [ $SSD_MODEL == "3IE" ];then | ||
echo "$0 `date` SSD FW upgraded from S141002C to S141002G in first mp_64." >> $SSD_UPGRADE_LOG | ||
else | ||
echo "$0 `date` SSD FW upgraded from S16425c1 to S16425cG in first mp_64." >> $SSD_UPGRADE_LOG | ||
fi | ||
elif [ $SSD_UPGRADE_STATUS2 == "1" ]; then | ||
echo "$0 `date` SSD entered loader mode in first mp_64 and upgraded to latest version after second mp_64." >> $SSD_UPGRADE_LOG | ||
fi | ||
|
||
else | ||
if [ $SSD_UPGRADE_STATUS1 == "ff" ] && [ $SSD_UPGRADE_STATUS2 == "ff" ]; then | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_pending_upgrade | ||
|
||
echo "$0 `date` SSD upgrade didn’t happened." >> $SSD_UPGRADE_LOG | ||
|
||
elif [ $SSD_UPGRADE_STATUS1 == "1" ]; then | ||
rm -rf $SSD_FW_UPGRADE/GPIO7_* | ||
touch $SSD_FW_UPGRADE/GPIO7_low | ||
logger -p user.crit -t DELL_S6100_SSD_MON "The SSD on this unit is faulty and does not support reboot." | ||
logger -p user.crit -t DELL_S6100_SSD_MON "If a reboot is required, please perform a soft-/fast-/warm-reboot." | ||
|
||
echo "$0 `date` SSD entered loader mode in first mp_64 upgrade." >> $SSD_UPGRADE_LOG | ||
|
||
if [ $SSD_UPGRADE_STATUS2 == "0" ]; then | ||
echo "$0 `date` SSD entered loader mode in first mp_64 and recovered back to older version in second mp_64." >> $SSD_UPGRADE_LOG | ||
fi | ||
fi | ||
|
||
fi | ||
|
||
echo "$0 `date` SMF Register 1 = $SSD_UPGRADE_STATUS1" >> $SSD_UPGRADE_LOG | ||
echo "$0 `date` SMF Register 2 = $SSD_UPGRADE_STATUS2" >> $SSD_UPGRADE_LOG | ||
echo "$SMART_CMD" >> $SSD_UPGRADE_LOG | ||
echo "$iSMART_CMD" >> $SSD_UPGRADE_LOG | ||
sync | ||
# Clearing the upgrade status | ||
io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 09 --offset 211; io_rd_wr.py --set --val ff --offset 213 | ||
io_rd_wr.py --set --val 06 --offset 210; io_rd_wr.py --set --val 0A --offset 211; io_rd_wr.py --set --val ff --offset 213 |
12 changes: 12 additions & 0 deletions
12
platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-monitor.service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[Unit] | ||
Description=Dell S6100 SSD monitoring poller | ||
DefaultDependencies=no | ||
|
||
[Service] | ||
User=root | ||
ExecStart=/usr/local/bin/s6100_ssd_mon.sh | ||
RemainAfterExit=no | ||
|
||
[Install] | ||
WantedBy=multi-user.target | ||
|
12 changes: 12 additions & 0 deletions
12
platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-monitor.timer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[Unit] | ||
Description=Dell S6100 SSD monitoring poller timer | ||
DefaultDependencies=no | ||
After=pmon.service | ||
|
||
[Timer] | ||
OnBootSec=5min | ||
OnUnitActiveSec=60min | ||
|
||
[Install] | ||
WantedBy=timers.target | ||
|
14 changes: 14 additions & 0 deletions
14
platform/broadcom/sonic-platform-modules-dell/s6100/systemd/s6100-ssd-upgrade-status.service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[Unit] | ||
Description= Checking Dell S6100 SSD upgrade status | ||
After=pmon.service | ||
DefaultDependencies=no | ||
|
||
[Service] | ||
User=root | ||
Type=oneshot | ||
ExecStart=/usr/local/bin/s6100_ssd_upgrade_status.sh | ||
RemainAfterExit=no | ||
|
||
[Install] | ||
WantedBy=multi-user.target | ||
|