diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp index 4e03c8462ce3..051f59d51e9b 100644 --- a/orchagent/fgnhgorch.cpp +++ b/orchagent/fgnhgorch.cpp @@ -6,6 +6,7 @@ #include "swssnet.h" #include "crmorch.h" #include +#include #define LINK_DOWN 0 #define LINK_UP 1 @@ -20,7 +21,7 @@ extern RouteOrch *gRouteOrch; extern CrmOrch *gCrmOrch; extern PortsOrch *gPortsOrch; -FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : +FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : Orch(db, tableNames), m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), @@ -105,6 +106,39 @@ void FgNhgOrch::update(SubjectType type, void *cntx) } } +bool FgNhgOrch::bake() +{ + SWSS_LOG_ENTER(); + + deque entries; + vector keys; + m_stateWarmRestartRouteTable.getKeys(keys); + + SWSS_LOG_NOTICE("Warm reboot: recovering entry %lu from state", keys.size()); + + for (const auto &key : keys) + { + vector tuples; + m_stateWarmRestartRouteTable.get(key, tuples); + + NextHopIndexMap nhop_index_map(tuples.size(), std::string()); + for (const auto &tuple : tuples) + { + const auto index = stoi(fvField(tuple)); + const auto nextHop = fvValue(tuple); + + nhop_index_map[index] = nextHop; + SWSS_LOG_INFO("Storing next hop %s at index %d", nhop_index_map[index].c_str(), index); + } + + // Recover nexthop with index relationship + m_recoveryMap[key] = nhop_index_map; + + m_stateWarmRestartRouteTable.del(key); + } + + return Orch::bake(); +} /* calculateBankHashBucketStartIndices: generates the hash_bucket_indices for all banks * and stores it in fgNhgEntry for the group. @@ -191,7 +225,6 @@ void FgNhgOrch::setStateDbRouteEntry(const IpPrefix &ipPrefix, uint32_t index, N } - bool FgNhgOrch::writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid, const IpPrefix &ipPrefix, NextHopKey nextHop) { @@ -881,6 +914,8 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh SWSS_LOG_ENTER(); sai_status_t status; + bool isWarmReboot = false; + auto nexthopsMap = m_recoveryMap.find(ipPrefix.to_string()); for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++) { uint32_t bank = i; @@ -913,11 +948,33 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh return false; } + // recover state before warm reboot + if (nexthopsMap != m_recoveryMap.end()) + { + isWarmReboot = true; + } + + SWSS_LOG_INFO("Warm reboot is set to %d", isWarmReboot); + for (uint32_t j = fgNhgEntry->hash_bucket_indices[i].start_index; j <= fgNhgEntry->hash_bucket_indices[i].end_index; j++) { - NextHopKey bank_nh_memb = bank_member_changes[bank].nhs_to_add[j % - bank_member_changes[bank].nhs_to_add.size()]; + NextHopKey bank_nh_memb; + if (isWarmReboot) + { + bank_nh_memb = nexthopsMap->second[j]; + SWSS_LOG_INFO("Recovering nexthop %s with bucket %d", bank_nh_memb.ip_address.to_string().c_str(), j); + // case nhps in bank are all down + if (fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank != i) + { + syncd_fg_route_entry.inactive_to_active_map[i] = fgNhgEntry->next_hops[bank_nh_memb.ip_address].bank; + } + } + else + { + bank_nh_memb = bank_member_changes[bank].nhs_to_add[j % + bank_member_changes[bank].nhs_to_add.size()]; + } // Create a next hop group member sai_attribute_t nhgm_attr; @@ -961,6 +1018,11 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh } } + if (isWarmReboot) + { + m_recoveryMap.erase(nexthopsMap); + } + return true; } @@ -1592,7 +1654,7 @@ bool FgNhgOrch::doTaskFgNhgMember(const KeyOpFieldsValuesTuple & t) } fgNhg_entry->second.next_hops[next_hop] = fg_nh_info; SWSS_LOG_INFO("FG_NHG member added for group %s, next-hop %s", - fgNhg_entry->second.fg_nhg_name.c_str(), next_hop.to_string().c_str()); + fgNhg_entry->second.fg_nhg_name.c_str(), nhk.to_string().c_str()); } } else if (op == DEL_COMMAND) diff --git a/orchagent/fgnhgorch.h b/orchagent/fgnhgorch.h index a45faec4e0ca..c096f7b90ebd 100644 --- a/orchagent/fgnhgorch.h +++ b/orchagent/fgnhgorch.h @@ -83,11 +83,14 @@ typedef struct std::vector active_nhs; } BankMemberChanges; +typedef std::vector NextHopIndexMap; +typedef map WarmBootRecoveryMap; + class FgNhgOrch : public Orch, public Observer { public: FgNhgPrefixes fgNhgPrefixes; - FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); + FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); void update(SubjectType type, void *cntx); bool addRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&); @@ -95,6 +98,9 @@ class FgNhgOrch : public Orch, public Observer bool validNextHopInNextHopGroup(const NextHopKey&); bool invalidNextHopInNextHopGroup(const NextHopKey&); + // warm reboot support + bool bake() override; + private: NeighOrch *m_neighOrch; IntfsOrch *m_intfsOrch; @@ -106,6 +112,10 @@ class FgNhgOrch : public Orch, public Observer FgPrefixOpCache m_fgPrefixAddCache; FgPrefixOpCache m_fgPrefixDelCache; + // warm reboot support for recovery + // < ip_prefix, < HashBuckets, nh_ip>> + WarmBootRecoveryMap m_recoveryMap; + bool setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix&); diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 4612dc1ff11d..a1f77854e8ab 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -123,10 +123,12 @@ bool OrchDaemon::init() gIntfsOrch = new IntfsOrch(m_applDb, APP_INTF_TABLE_NAME, vrf_orch); gNeighOrch = new NeighOrch(m_applDb, APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch); - vector fgnhg_tables = { - CFG_FG_NHG, - CFG_FG_NHG_PREFIX, - CFG_FG_NHG_MEMBER + const int fgnhgorch_pri = 15; + + vector fgnhg_tables = { + { CFG_FG_NHG, fgnhgorch_pri }, + { CFG_FG_NHG_PREFIX, fgnhgorch_pri }, + { CFG_FG_NHG_MEMBER, fgnhgorch_pri } }; gFgNhgOrch = new FgNhgOrch(m_configDb, m_applDb, m_stateDb, fgnhg_tables, gNeighOrch, gIntfsOrch, vrf_orch); diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index c1b36337a06a..5f71471a576a 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -323,10 +323,12 @@ namespace aclorch_test gNeighOrch = new NeighOrch(m_app_db.get(), APP_NEIGH_TABLE_NAME, gIntfsOrch, gFdbOrch, gPortsOrch); ASSERT_EQ(gFgNhgOrch, nullptr); - vector fgnhg_tables = { - CFG_FG_NHG, - CFG_FG_NHG_PREFIX, - CFG_FG_NHG_MEMBER + const int fgnhgorch_pri = 15; + + vector fgnhg_tables = { + { CFG_FG_NHG, fgnhgorch_pri }, + { CFG_FG_NHG_PREFIX, fgnhgorch_pri }, + { CFG_FG_NHG_MEMBER, fgnhgorch_pri } }; gFgNhgOrch = new FgNhgOrch(m_config_db.get(), m_app_db.get(), m_state_db.get(), fgnhg_tables, gNeighOrch, gIntfsOrch, gVrfOrch); diff --git a/tests/test_fgnhg.py b/tests/test_fgnhg.py index 16ad325b2765..6b6af86385fd 100644 --- a/tests/test_fgnhg.py +++ b/tests/test_fgnhg.py @@ -192,6 +192,19 @@ def startup_link(dvs, db, port): dvs.servers[port].runcmd("ip link set up dev eth0") == 0 db.wait_for_field_match("PORT_TABLE", "Ethernet%d" % (port * 4), {"oper_status": "up"}) +def run_warm_reboot(dvs): + dvs.runcmd("config warm_restart enable swss") + + # Stop swss before modifing the configDB + dvs.stop_swss() + + # start to apply new port_config.ini + dvs.start_swss() + dvs.runcmd(['sh', '-c', 'supervisorctl start neighsyncd']) + dvs.runcmd(['sh', '-c', 'supervisorctl start restore_neighbors']) + + # Enabling some extra logging for validating the order of orchagent + dvs.runcmd("swssloglevel -l INFO -c orchagent") def verify_programmed_fg_state_db_entry(state_db,nh_memb_exp_count): memb_dict = nh_memb_exp_count @@ -237,6 +250,25 @@ def program_route_and_validate_fine_grained_ecmp(app_db, asic_db, state_db, ip_t validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) +def fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, active_nhs): + # remove fgnhg prefix: The fine grained route should transition to regular ECMP/route + remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + + # Validate regular ECMP + validate_asic_nhg_regular_ecmp(asic_db, fg_nhg_prefix) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, active_nhs) + state_db.wait_for_n_keys("FG_ROUTE_TABLE", 0) + + # clean up route entry + asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) + ps = swsscommon.ProducerStateTable(app_db.db_connection, ROUTE_TB) + ps._del(fg_nhg_prefix) + asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + + # Cleanup all FG, arp and interface + remove_entry(config_db, "FG_NHG", fg_nhg_name) + class TestFineGrainedNextHopGroup(object): def test_route_fgnhg(self, dvs, testlog): app_db = dvs.get_app_db() @@ -503,23 +535,9 @@ def test_route_fgnhg(self, dvs, testlog): validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) - # remove fgnhg prefix: The fine grained route should transition to regular ECMP/route - remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) - - # Validate regular ECMP - validate_asic_nhg_regular_ecmp(asic_db, fg_nhg_prefix) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 3) - state_db.wait_for_n_keys("FG_ROUTE_TABLE", 0) - - # remove prefix entry - asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) - ps._del(fg_nhg_prefix) - asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) - - # Cleanup all FG, arp and interface - remove_entry(config_db, "FG_NHG", fg_nhg_name) - + fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, 3) + + # clean up nh entries for i in range(0,NUM_NHs): if_name_key = "Ethernet" + str(i*4) vlan_name_key = "Vlan" + str((i+1)*4) @@ -532,7 +550,6 @@ def test_route_fgnhg(self, dvs, testlog): dvs.servers[i].runcmd("ip link set down dev eth0") == 0 remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0." + str(1 + i*2)) - ### Create new set of entries with a greater number of FG members and ### bigger bucket size such that the # of nhs are not divisible by ### bucket size. Different physical interface type for dynamicitiy. @@ -626,12 +643,126 @@ def test_route_fgnhg(self, dvs, testlog): "10.0.0.9":13,"10.0.0.11":64} program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + fgnhg_clean_up(config_db, asic_db, app_db, state_db, fg_nhg_name, fg_nhg_prefix, 6) + # clean up nh entries + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + ip_pref_key = "Ethernet" + str(i*4) + "|10.0.0." + str(i*2) + "/31" + remove_entry(config_db, IF_TB, ip_pref_key) + remove_entry(config_db, IF_TB, vlan_name_key) + dvs.runcmd("config interface shutdown " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0." + str(1 + i*2)) - # Remove route - # remove prefix entry - asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) - ps._del(fg_nhg_prefix) - asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) - asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + def test_route_fgnhg_warm_reboot(self, dvs, testlog): + dvs.runcmd("swssloglevel -l INFO -c orchagent") + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + config_db = dvs.get_config_db() + state_db = dvs.get_state_db() + fvs_nul = {"NULL": "NULL"} + NUM_NHs = 6 + fg_nhg_name = "fgnhg_v4" + fg_nhg_prefix = "2.2.2.0/24" + bucket_size = 60 + ip_to_if_map = {} - remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + fvs = {"bucket_size": str(bucket_size)} + create_entry(config_db, FG_NHG, fg_nhg_name, fvs) + + fvs = {"FG_NHG": fg_nhg_name} + create_entry(config_db, FG_NHG_PREFIX, fg_nhg_prefix, fvs) + + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + vlan_name_key = "Vlan" + str((i+1)*4) + ip_pref_key = vlan_name_key + "|10.0.0." + str(i*2) + "/31" + fvs = {"vlanid": str((i+1)*4)} + create_entry(config_db, VLAN_TB, vlan_name_key, fvs) + fvs = {"tagging_mode": "untagged"} + create_entry(config_db, VLAN_MEMB_TB, vlan_name_key + "|" + if_name_key, fvs) + create_entry(config_db, VLAN_IF_TB, vlan_name_key, fvs_nul) + create_entry(config_db, VLAN_IF_TB, ip_pref_key, fvs_nul) + dvs.runcmd("config interface startup " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + bank = 0 + if i >= NUM_NHs/2: + bank = 1 + fvs = {"FG_NHG": fg_nhg_name, "bank": str(bank), "link": if_name_key} + create_entry(config_db, FG_NHG_MEMBER, "10.0.0." + str(1 + i*2), fvs) + ip_to_if_map["10.0.0." + str(1 + i*2)] = vlan_name_key + + # Wait for the software to receive the entries + time.sleep(1) + + ps = swsscommon.ProducerStateTable(app_db.db_connection, ROUTE_TB) + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.7,10.0.0.9,10.0.0.11"), + ("ifname", "Vlan16,Vlan20,Vlan24")]) + ps.set(fg_nhg_prefix, fvs) + # No ASIC_DB entry we can wait for since ARP is not resolved yet, + # We just use sleep so that the sw receives this entry + time.sleep(1) + + asic_nh_count = len(asic_db.get_keys(ASIC_NH_TB)) + dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") + dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") + dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") + dvs.runcmd("arp -s 10.0.0.7 00:00:00:00:00:04") + dvs.runcmd("arp -s 10.0.0.9 00:00:00:00:00:05") + dvs.runcmd("arp -s 10.0.0.11 00:00:00:00:00:06") + asic_db.wait_for_n_keys(ASIC_NH_TB, asic_nh_count + 6) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + # Now that ARP was resolved, 10.0.0.7 should be added as a valid fg nhg member + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + + run_warm_reboot(dvs) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":30,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 2 next hop and bring up 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.9":60} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up some next-hops in bank 0 for the 1st time + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + run_warm_reboot(dvs) + + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + nhgid = validate_asic_nhg_fine_grained_ecmp(asic_db, fg_nhg_prefix, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) \ No newline at end of file