From d655d20f9ae1b9918fc17317a203046299eba19a Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Mon, 29 Oct 2018 16:00:09 -0700 Subject: [PATCH] [syncd] partial warm recovery support (#352) * [syncd] delay creating diag shell to after switch is created Signed-off-by: Ying Xie * [syncd] provide default SAI warmboot data file in case not specified Signed-off-by: Ying Xie * [syncd] move performWarmRestart to sycnd_hard_reinit.cpp - Move function to get access to static local variables. - Improve the function according to the new SAI interaction requirements. - Remove 'TODO' and exception since the code is now working. Signed-off-by: Ying Xie * [syncd] restore previously changed warm boot condition Signed-off-by: Ying Xie * [syncd script] refactor syncd init script - move start type setting in a helper function Signed-off-by: Ying Xie * [syncd script] check and set warm start type WARM start takes precedence over fast start. Because fast boot option is a kernel command line option. This option will always be there after a fast reboot. Signed-off-by: Ying Xie * [syncd] comment out warm start option calculation for now redis-cli is not available in syncd docker. Some work/discussion/decision is required if we want to continue start option. Signed-off-by: Ying Xie * [syncd] take warm boot hint from database directly Signed-off-by: Ying Xie --- syncd/scripts/syncd_init_common.sh | 52 ++++++++++-------- syncd/syncd.cpp | 84 +++++++---------------------- syncd/syncd.h | 4 ++ syncd/syncd_hard_reinit.cpp | 86 +++++++++++++++++++++++++++++- 4 files changed, 137 insertions(+), 89 deletions(-) diff --git a/syncd/scripts/syncd_init_common.sh b/syncd/scripts/syncd_init_common.sh index cbc7b4c06fd4..f33cd76a5e6c 100755 --- a/syncd/scripts/syncd_init_common.sh +++ b/syncd/scripts/syncd_init_common.sh @@ -34,6 +34,31 @@ case "$(cat /proc/cmdline)" in esac +function check_warm_boot() +{ + # FIXME: if we want to continue start option approach, then we need to add + # code here to support redis database query. + # SYSTEM_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|system" enable` + # SERVICE_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|${SERVICE}" enable` + # SYSTEM_WARM_START could be empty, always make WARM_BOOT meaningful. + # if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then + # WARM_BOOT="true" + # else + WARM_BOOT="false" + # fi +} + + +function set_start_type() +{ + if [ x"$WARM_BOOT" == x"true" ]; then + CMD_ARGS+=" -t warm" + elif [ $FAST_REBOOT == "yes" ]; then + CMD_ARGS+=" -t fast" + fi +} + + config_syncd_bcm() { if [ -f "/etc/sai.d/sai.profile" ]; then @@ -45,10 +70,6 @@ config_syncd_bcm() [ -e /dev/linux-bcm-knet ] || mknod /dev/linux-bcm-knet c 122 0 [ -e /dev/linux-user-bde ] || mknod /dev/linux-user-bde c 126 0 [ -e /dev/linux-kernel-bde ] || mknod /dev/linux-kernel-bde c 127 0 - - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd_mlnx() @@ -66,10 +87,6 @@ config_syncd_mlnx() # Write MAC address into /tmp/profile file. cat $HWSKU_DIR/sai.profile > /tmp/sai.profile echo "DEVICE_MAC_ADDRESS=$ALIGNED_MAC_ADDRESS" >> /tmp/sai.profile - - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd_centec() @@ -78,10 +95,6 @@ config_syncd_centec() [ -e /dev/linux_dal ] || mknod /dev/linux_dal c 198 0 [ -e /dev/net/tun ] || ( mkdir -p /dev/net && mknod /dev/net/tun c 10 200 ) - - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd_cavium() @@ -94,10 +107,6 @@ config_syncd_cavium() until [ $(redis-cli ping | grep -c PONG) -gt 0 ]; do sleep 1 done - - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd_marvell() @@ -120,22 +129,17 @@ config_syncd_barefoot() export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/bfn/install/lib/platform/$ONIE_PLATFORM:/opt/bfn/install/lib:/opt/bfn/install/lib/tofinopd/switch ./opt/bfn/install/bin/dma_setup.sh export LD_PRELOAD=libswitchapi.so:libswitchsai.so:libpd.so:libpdcli.so:libdriver.so:libbfsys.so:libbfutils.so:libbf_switchd_lib.so:libtofinopdfixed_thrift.so:libpdthrift.so - - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd_nephos() { CMD_ARGS+=" -p $HWSKU_DIR/sai.profile" - if [ $FAST_REBOOT == "yes" ]; then - CMD_ARGS+=" -t fast" - fi } config_syncd() { + check_warm_boot + if [ "$SONIC_ASIC_TYPE" == "broadcom" ]; then config_syncd_bcm elif [ "$SONIC_ASIC_TYPE" == "mellanox" ]; then @@ -155,6 +159,8 @@ config_syncd() exit 1 fi + set_start_type + if [ ${ENABLE_SAITHRIFT} == 1 ]; then CMD_ARGS+=" -r -m $HWSKU_DIR/port_config.ini" fi diff --git a/syncd/syncd.cpp b/syncd/syncd.cpp index 41137d3344e3..9d21731f66b5 100644 --- a/syncd/syncd.cpp +++ b/syncd/syncd.cpp @@ -5,6 +5,8 @@ #include "swss/tokenize.h" #include +#include "swss/warm_restart.h" + extern "C" { #include } @@ -13,6 +15,8 @@ extern "C" { #include #include +#define DEF_SAI_WARM_BOOT_DATA_FILE "/var/warmboot/sai-warmboot.bin" + /** * @brief Global mutex for thread synchronization * @@ -3059,6 +3063,13 @@ void handleProfileMap(const std::string& profileMapFile) exit(EXIT_FAILURE); } + // Provide default value at boot up time and let sai profile value + // Override following values if existing. + // SAI reads these values at start up time. It would be too late to + // set these values later when WARM BOOT is detected. + gProfileMap[SAI_KEY_WARM_BOOT_WRITE_FILE] = DEF_SAI_WARM_BOOT_DATA_FILE; + gProfileMap[SAI_KEY_WARM_BOOT_READ_FILE] = DEF_SAI_WARM_BOOT_DATA_FILE; + std::string line; while(getline(profile, line)) @@ -3288,61 +3299,6 @@ void set_sai_api_log_min_prio(const std::string &prioStr) } } -void performWarmRestart() -{ - SWSS_LOG_ENTER(); - - /* - * There should be no case when we are doing warm restart and there is no - * switch defined, we will throw at sucha case. - * - * This case could be possible when no switches were created and only api - * was initialized, but we will skip this scenario and address is when we - * will have need for it. - */ - - auto entries = g_redisClient->keys(ASIC_STATE_TABLE + std::string(":SAI_OBJECT_TYPE_SWITCH:*")); - - if (entries.size() == 0) - { - SWSS_LOG_THROW("on warm restart there is no switches defined in DB, not supported yet, FIXME"); - } - - if (entries.size() != 1) - { - SWSS_LOG_THROW("multiple switches defined in warm start: %zu, not supported yet, FIXME", entries.size()); - } - - /* - * Here wa have only one switch defined, let's extract his vid and rid. - */ - - /* - * Entry should be in format ASIC_STATE:SAI_OBJECT_TYPE_SWITCH:oid:0xYYYY - * - * Let's extract oid value - */ - - std::string key = entries.at(0); - - auto start = key.find_first_of(":") + 1; - auto end = key.find(":", start); - - std::string strSwitchVid = key.substr(end + 1); - - sai_object_id_t switch_vid; - - sai_deserialize_object_id(strSwitchVid, switch_vid); - - sai_object_id_t switch_rid = translate_vid_to_rid(switch_vid); - - /* - * Perform all get operations on existing switch. - */ - - switches[switch_vid] = std::make_shared(switch_vid, switch_rid); -} - void onSyncdStart(bool warmStart) { SWSS_LOG_ENTER(); @@ -3377,14 +3333,6 @@ void onSyncdStart(bool warmStart) performWarmRestart(); SWSS_LOG_NOTICE("skipping hard reinit since WARM start was performed"); - - // TODO issue here can be that in hard start there was 8 queues then - // user added 2, and we have 10, after warm restart, switch will - // discover 10 queus, and mark them as "non removable" but 2 of them - // can be removed. We would probably need to store all objects after - // hard reinit and treat that as base. - - SWSS_LOG_THROW("warm restart is not yet fully supported and needs to be revisited"); return; } @@ -3466,6 +3414,9 @@ int syncd_main(int argc, char **argv) swss::Logger::linkToDbNative("syncd"); + swss::WarmStart::initialize("syncd", "syncd"); + swss::WarmStart::checkWarmStart("syncd"); + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" sai_metadata_log = &sai_meta_log_syncd; @@ -3506,6 +3457,11 @@ int syncd_main(int argc, char **argv) g_veryFirstRun = isVeryFirstRun(); + if (swss::WarmStart::isWarmStart()) + { + options.startType = SAI_WARM_BOOT; + } + if (options.startType == SAI_WARM_BOOT) { const char *warmBootReadFile = profile_get_value(0, SAI_KEY_WARM_BOOT_READ_FILE); @@ -3572,7 +3528,7 @@ int syncd_main(int argc, char **argv) try { SWSS_LOG_NOTICE("before onSyncdStart"); - onSyncdStart(false); + onSyncdStart(options.startType == SAI_WARM_BOOT); SWSS_LOG_NOTICE("after onSyncdStart"); startNotificationsProcessingThread(); diff --git a/syncd/syncd.h b/syncd/syncd.h index f23562652e4e..01f720426c82 100644 --- a/syncd/syncd.h +++ b/syncd/syncd.h @@ -67,6 +67,10 @@ void startDiagShell(); void hardReinit(); +void performWarmRestart(); + +sai_object_id_t translate_vid_to_rid(_In_ sai_object_id_t vid); + void redisClearVidToRidMap(); void redisClearRidToVidMap(); diff --git a/syncd/syncd_hard_reinit.cpp b/syncd/syncd_hard_reinit.cpp index 58b8ac67b903..8d28cc636603 100644 --- a/syncd/syncd_hard_reinit.cpp +++ b/syncd/syncd_hard_reinit.cpp @@ -445,8 +445,6 @@ void processSwitches() g_translatedV2R[switch_vid] = switch_rid; g_translatedR2V[switch_rid] = switch_vid; - startDiagShell(); - auto sw = switches[switch_vid] = std::make_shared(switch_vid, switch_rid); /* @@ -458,6 +456,8 @@ void processSwitches() g_sw = sw; + startDiagShell(); + /* * We processed switch. We have switch vid/rid so we can process all * other attributes of switches that are not mandatory on create and are @@ -1203,3 +1203,85 @@ void hardReinit() checkAllIds(); } + +void performWarmRestart() +{ + SWSS_LOG_ENTER(); + + /* + * There should be no case when we are doing warm restart and there is no + * switch defined, we will throw at sucha case. + * + * This case could be possible when no switches were created and only api + * was initialized, but we will skip this scenario and address is when we + * will have need for it. + */ + + auto entries = g_redisClient->keys(ASIC_STATE_TABLE + std::string(":SAI_OBJECT_TYPE_SWITCH:*")); + + if (entries.size() == 0) + { + SWSS_LOG_THROW("on warm restart there is no switches defined in DB, not supported yet, FIXME"); + } + + if (entries.size() != 1) + { + SWSS_LOG_THROW("multiple switches defined in warm start: %zu, not supported yet, FIXME", entries.size()); + } + + /* + * Here wa have only one switch defined, let's extract his vid and rid. + */ + + /* + * Entry should be in format ASIC_STATE:SAI_OBJECT_TYPE_SWITCH:oid:0xYYYY + * + * Let's extract oid value + */ + + std::string key = entries.at(0); + + auto start = key.find_first_of(":") + 1; + auto end = key.find(":", start); + + std::string strSwitchVid = key.substr(end + 1); + + sai_object_id_t switch_vid; + + sai_deserialize_object_id(strSwitchVid, switch_vid); + + sai_object_id_t orig_rid = translate_vid_to_rid(switch_vid); + + sai_object_id_t switch_rid; + sai_attribute_t switch_attr; + switch_attr.id = SAI_SWITCH_ATTR_INIT_SWITCH; + switch_attr.value.booldata = true; + sai_status_t status = sai_metadata_sai_switch_api->create_switch(&switch_rid, 1, &switch_attr); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_THROW("failed to create switch RID: %s", + sai_serialize_status(status).c_str()); + } + if (orig_rid != switch_rid) + { + SWSS_LOG_THROW("Unexpected RID 0x%lx (expected 0x%lx)", + switch_rid, orig_rid); + } + + g_translatedV2R[switch_vid] = switch_rid; + g_translatedR2V[switch_rid] = switch_vid; + + /* + * Perform all get operations on existing switch. + */ + + auto sw = switches[switch_vid] = std::make_shared(switch_vid, switch_rid); + + g_switch_rid = switch_rid; + g_switch_vid = switch_vid; + + g_sw = sw; + + startDiagShell(); +}