From a72e4073fadf4c27ba2dca3a825ad64e48ddf6f2 Mon Sep 17 00:00:00 2001 From: Vaibhav Hemant Dixit Date: Wed, 25 Aug 2021 08:59:02 -0700 Subject: [PATCH] [202012][fast-reboot] Remove FLEX_COUNTER_TABLE from config_db.json before reboot (#1774) Remove FLEX_COUNTER_TABLE from config_db.json before fast-reboot to allow delaying FLEX counter polling after fast-reboot. Delaying FLEX counter polling is important to keep fastboot dataplane downtime under 30s. In the going down path, add a step to modify config_db.json - remove the key:value for FLEX_COUNTER_TABLE table. Repro'd the issue in the latest 202012 image. With the fix, the counter polling is delayed and downtime is back to normal. --- scripts/fast-reboot | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index 38f1de96c1..23b98a34cd 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -20,6 +20,7 @@ PLATFORM=$(sonic-cfggen -H -v DEVICE_METADATA.localhost.platform) PLATFORM_PLUGIN="${REBOOT_TYPE}_plugin" LOG_SSD_HEALTH="/usr/local/bin/log_ssd_health" SSD_FW_UPDATE="ssd-fw-upgrade" +CONFIG_DB_FILE="/etc/sonic/config_db.json" TAG_LATEST=yes # Require 100M available on the hard drive for warm reboot temp files, @@ -512,7 +513,6 @@ if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then # Dump the ARP and FDB tables to files also as default routes for both IPv4 and IPv6 # into /host/fast-reboot DUMP_DIR=/host/fast-reboot - CONFIG_DB_FILE=/etc/sonic/config_db.json mkdir -p $DUMP_DIR FAST_REBOOT_DUMP_RC=0 /usr/local/bin/fast-reboot-dump.py -t $DUMP_DIR || FAST_REBOOT_DUMP_RC=$? @@ -524,7 +524,7 @@ if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then FILTER_FDB_ENTRIES_RC=0 # Filter FDB entries using MAC addresses from ARP table - /usr/local/bin/filter_fdb_entries -f $DUMP_DIR/fdb.json -a $DUMP_DIR/arp.json -c $CONFIG_DB_FILE || FILTER_FDB_ENTRIES_RC=$? + /usr/local/bin/filter_fdb_entries -f $DUMP_DIR/fdb.json -a $DUMP_DIR/arp.json -c ${CONFIG_DB_FILE} || FILTER_FDB_ENTRIES_RC=$? if [[ FILTER_FDB_ENTRIES_RC -ne 0 ]]; then error "Failed to filter FDb entries. Exit code: $FILTER_FDB_ENTRIES_RC" unload_kernel @@ -679,6 +679,14 @@ then systemctl stop "$service_name" fi +if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then + # Remove FLEX_COUNTER_TABLE from config_db.json + # This is done so that in fast-reboot recovery path, FLEX_COUNTER polling is delayed. + # Delayed FLEX_COUNTER polling is an attempt keep dataplane downtime below 30s threshold + jq --indent 4 'del(.FLEX_COUNTER_TABLE)' ${CONFIG_DB_FILE} > ${CONFIG_DB_FILE}.new + mv ${CONFIG_DB_FILE}.new ${CONFIG_DB_FILE} +fi + # Update the reboot cause file to reflect that user issued this script # Upon next boot, the contents of this file will be used to determine the # cause of the previous reboot