From 7e3b2c6303d6fb2ec77cc498a7fb86bce43d09a0 Mon Sep 17 00:00:00 2001 From: nkelapur Date: Sat, 9 Jan 2021 01:21:52 +0530 Subject: [PATCH] [Evpn Warmreboot] Added Dependancy check logic in VrfMgr (#1466) * [Evpn Warmreboot] Added Dependancy check logic in VrfMgr This was done to ensure for EVPN warm-reboot the order of data replay to kernel is maintained across various submodules and the kernel programming will be successful. * Marking Vrfmgrd and Intfmgrd to reconcile immediately after replay There is no reconcile operation required in vrfmgrd and intfmgrd, hence immediately after replay these are marked as reconciled --- cfgmgr/intfmgr.cpp | 49 +++++++++++++++++++++++++++++++++++++++ cfgmgr/intfmgr.h | 3 +++ cfgmgr/vrfmgr.cpp | 5 ++++ cfgmgr/vrfmgrd.cpp | 12 ++++++++++ tests/test_warm_reboot.py | 4 ++-- 5 files changed, 71 insertions(+), 2 deletions(-) diff --git a/cfgmgr/intfmgr.cpp b/cfgmgr/intfmgr.cpp index daa25872a199..34b82d3c060c 100644 --- a/cfgmgr/intfmgr.cpp +++ b/cfgmgr/intfmgr.cpp @@ -24,6 +24,10 @@ using namespace swss; IntfMgr::IntfMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, const vector &tableNames) : Orch(cfgDb, tableNames), + m_cfgIntfTable(cfgDb, CFG_INTF_TABLE_NAME), + m_cfgVlanIntfTable(cfgDb, CFG_VLAN_INTF_TABLE_NAME), + m_cfgLagIntfTable(cfgDb, CFG_LAG_INTF_TABLE_NAME), + m_cfgLoopbackIntfTable(cfgDb, CFG_LOOPBACK_INTERFACE_TABLE_NAME), m_statePortTable(stateDb, STATE_PORT_TABLE_NAME), m_stateLagTable(stateDb, STATE_LAG_TABLE_NAME), m_stateVlanTable(stateDb, STATE_VLAN_TABLE_NAME), @@ -34,6 +38,12 @@ IntfMgr::IntfMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, c if (!WarmStart::isWarmStart()) { flushLoopbackIntfs(); + WarmStart::setWarmStartState("intfmgrd", WarmStart::WSDISABLED); + } + else + { + //Build the interface list to be replayed to Kernel + buildIntfReplayList(); } } @@ -172,6 +182,25 @@ int IntfMgr::getIntfIpCount(const string &alias) return std::stoi(res); } +void IntfMgr::buildIntfReplayList(void) +{ + vector intfList; + + m_cfgIntfTable.getKeys(intfList); + std::copy( intfList.begin(), intfList.end(), std::inserter( m_pendingReplayIntfList, m_pendingReplayIntfList.end() ) ); + + m_cfgLoopbackIntfTable.getKeys(intfList); + std::copy( intfList.begin(), intfList.end(), std::inserter( m_pendingReplayIntfList, m_pendingReplayIntfList.end() ) ); + + m_cfgVlanIntfTable.getKeys(intfList); + std::copy( intfList.begin(), intfList.end(), std::inserter( m_pendingReplayIntfList, m_pendingReplayIntfList.end() ) ); + + m_cfgLagIntfTable.getKeys(intfList); + std::copy( intfList.begin(), intfList.end(), std::inserter( m_pendingReplayIntfList, m_pendingReplayIntfList.end() ) ); + + SWSS_LOG_INFO("Found %d Total Intfs to be replayed", (int)m_pendingReplayIntfList.size() ); +} + bool IntfMgr::isIntfCreated(const string &alias) { vector temp; @@ -676,6 +705,7 @@ bool IntfMgr::doIntfAddrTask(const vector& keys, void IntfMgr::doTask(Consumer &consumer) { SWSS_LOG_ENTER(); + static bool replayDone = false; auto it = consumer.m_toSync.begin(); while (it != consumer.m_toSync.end()) @@ -693,6 +723,11 @@ void IntfMgr::doTask(Consumer &consumer) it++; continue; } + else + { + //Entry programmed, remove it from pending list if present + m_pendingReplayIntfList.erase(keys[0]); + } } else if (keys.size() == 2) { @@ -701,6 +736,11 @@ void IntfMgr::doTask(Consumer &consumer) it++; continue; } + else + { + //Entry programmed, remove it from pending list if present + m_pendingReplayIntfList.erase(keys[0] + config_db_key_delimiter + keys[1] ); + } } else { @@ -709,4 +749,13 @@ void IntfMgr::doTask(Consumer &consumer) it = consumer.m_toSync.erase(it); } + + if (!replayDone && WarmStart::isWarmStart() && m_pendingReplayIntfList.empty() ) + { + replayDone = true; + WarmStart::setWarmStartState("intfmgrd", WarmStart::REPLAYED); + // There is no operation to be performed for intfmgr reconcillation + // Hence mark it reconciled right away + WarmStart::setWarmStartState("intfmgrd", WarmStart::RECONCILED); + } } diff --git a/cfgmgr/intfmgr.h b/cfgmgr/intfmgr.h index b5ba6b15b21d..35d62a424d0e 100644 --- a/cfgmgr/intfmgr.h +++ b/cfgmgr/intfmgr.h @@ -19,10 +19,12 @@ class IntfMgr : public Orch private: ProducerStateTable m_appIntfTableProducer; + Table m_cfgIntfTable, m_cfgVlanIntfTable, m_cfgLagIntfTable, m_cfgLoopbackIntfTable; Table m_statePortTable, m_stateLagTable, m_stateVlanTable, m_stateVrfTable, m_stateIntfTable; std::set m_subIntfList; std::set m_loopbackIntfList; + std::set m_pendingReplayIntfList; void setIntfIp(const std::string &alias, const std::string &opCmd, const IpPrefix &ipPrefix); void setIntfVrf(const std::string &alias, const std::string &vrfName); @@ -36,6 +38,7 @@ class IntfMgr : public Orch bool isIntfCreated(const std::string &alias); bool isIntfChangeVrf(const std::string &alias, const std::string &vrfName); int getIntfIpCount(const std::string &alias); + void buildIntfReplayList(void); void addLoopbackIntf(const std::string &alias); void delLoopbackIntf(const std::string &alias); diff --git a/cfgmgr/vrfmgr.cpp b/cfgmgr/vrfmgr.cpp index d9164f47c51e..06c2a7b8cde3 100644 --- a/cfgmgr/vrfmgr.cpp +++ b/cfgmgr/vrfmgr.cpp @@ -102,6 +102,11 @@ VrfMgr::VrfMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, con << IP_CMD << " -6 rule add pref " << TABLE_LOCAL_PREF << " table local && " << IP_CMD << " -6 rule del pref 0"; EXEC_WITH_ERROR_THROW(cmd.str(), res); } + + if (!WarmStart::isWarmStart()) + { + WarmStart::setWarmStartState("vrfmgrd", WarmStart::WSDISABLED); + } } uint32_t VrfMgr::getFreeTable(void) diff --git a/cfgmgr/vrfmgrd.cpp b/cfgmgr/vrfmgrd.cpp index 2ecdf7968f25..af8e78bce8bf 100644 --- a/cfgmgr/vrfmgrd.cpp +++ b/cfgmgr/vrfmgrd.cpp @@ -35,6 +35,7 @@ mutex gDbMutex; int main(int argc, char **argv) { Logger::linkToDbNative("vrfmgrd"); + bool isWarmStart = false; SWSS_LOG_ENTER(); SWSS_LOG_NOTICE("--- Starting vrfmgrd ---"); @@ -56,6 +57,8 @@ int main(int argc, char **argv) VrfMgr vrfmgr(&cfgDb, &appDb, &stateDb, cfg_vrf_tables); + isWarmStart = WarmStart::isWarmStart(); + // TODO: add tables in stateDB which interface depends on to monitor list std::vector cfgOrchList = {&vrfmgr}; @@ -69,6 +72,7 @@ int main(int argc, char **argv) while (true) { Selectable *sel; + static bool firstReadTimeout = true; int ret; ret = s.select(&sel, SELECT_TIMEOUT); @@ -80,6 +84,14 @@ int main(int argc, char **argv) if (ret == Select::TIMEOUT) { vrfmgr.doTask(); + if (isWarmStart && firstReadTimeout) + { + firstReadTimeout = false; + WarmStart::setWarmStartState("vrfmgrd", WarmStart::REPLAYED); + // There is no operation to be performed for vrfmgrd reconcillation + // Hence mark it reconciled right away + WarmStart::setWarmStartState("vrfmgrd", WarmStart::RECONCILED); + } continue; } diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index 9cce686c99bc..a7a567fb6f64 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -46,7 +46,7 @@ def swss_check_RestoreCount(dvs, state_db, restore_count): if fv[0] == "restore_count": assert int(fv[1]) == restore_count[key] + 1 elif fv[0] == "state": - assert fv[1] == "reconciled" + assert fv[1] == "reconciled" or fv[1] == "disabled" def check_port_oper_status(appl_db, port_name, state): portTbl = swsscommon.Table(appl_db, swsscommon.APP_PORT_TABLE_NAME) @@ -76,7 +76,7 @@ def swss_app_check_RestoreCount_single(state_db, restore_count, name): if fv[0] == "restore_count": assert int(fv[1]) == restore_count[key] + 1 elif fv[0] == "state": - assert fv[1] == "reconciled" + assert fv[1] == "reconciled" or fv[1] == "disabled" def swss_app_check_warmstart_state(state_db, name, state): warmtbl = swsscommon.Table(state_db, swsscommon.STATE_WARM_RESTART_TABLE_NAME)