Skip to content

Commit

Permalink
[dualtor] Fix neighbor miss when mux is not ready (#2696)
Browse files Browse the repository at this point in the history
What I did
The issue is that MuxOrch::m_syncdNeighbors assumes all cached zero-mac neighbors have tunnel routes installed. The check before adding tunnel routes ignores the following zero-mac neighbor events.
Let's ensure that, if a zero-mac neighbor is present in MuxOrch::m_syncdNeighbors, it has a tunnel route installed.
So let's check the tunnel route install success before adding a neighbor to MuxOrch::m_syncdNeighbors.

Why I did it
To fix #2675
If MuxOrch is not fully initialized, and there is a FAILED neighbor added to kernel, the tunnel route creation will fail. But the subsequent FAILED neighbor events cannot trigger tunnel route creation because MuxOrch::m_syncdNeighbors caches the first event and regard the tunnel as already installed.

How I verified it
UT and verify on testbed.

Signed-off-by: Longxiang Lyu <lolv@microsoft.com>
  • Loading branch information
lolyu authored Mar 18, 2023
1 parent b085b5f commit c949517
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 14 deletions.
11 changes: 8 additions & 3 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
return;
}

auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
bool is_tunnel_route_installed = isStandaloneTunnelRouteInstalled(update.entry.ip_address);
// Handling zero MAC neighbor updates
if (!update.mac)
{
Expand All @@ -1098,7 +1098,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)

if (update.add)
{
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
if (!is_tunnel_route_installed)
{
createStandaloneTunnelRoute(update.entry.ip_address);
}
Expand All @@ -1113,7 +1113,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
* make sure to remove any existing tunnel routes to prevent conflicts.
* This block also covers the case of neighbor deletion.
*/
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
if (is_tunnel_route_installed)
{
removeStandaloneTunnelRoute(update.entry.ip_address);
}
Expand Down Expand Up @@ -1446,6 +1446,11 @@ void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
standalone_tunnel_neighbors_.erase(neighborIp);
}

bool MuxOrch::isStandaloneTunnelRouteInstalled(const IpAddress& neighborIp)
{
return standalone_tunnel_neighbors_.find(neighborIp) != standalone_tunnel_neighbors_.end();
}

MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
Orch2(db, tableName, request_),
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),
Expand Down
2 changes: 2 additions & 0 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ class MuxOrch : public Orch2, public Observer, public Subject
bool removeNextHopTunnel(std::string tunnelKey, IpAddress& ipAddr);
sai_object_id_t getNextHopTunnelId(std::string tunnelKey, IpAddress& ipAddr);

bool isStandaloneTunnelRouteInstalled(const IpAddress& neighborIp);

private:
virtual bool addOperation(const Request& request);
virtual bool delOperation(const Request& request);
Expand Down
42 changes: 32 additions & 10 deletions orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,17 +731,33 @@ void NeighOrch::doTask(Consumer &consumer)
mac_address = MacAddress(fvValue(*i));
}

if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
bool nbr_not_found = (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end());
if (nbr_not_found || m_syncdNeighbors[neighbor_entry].mac != mac_address)
{
// only for unresolvable neighbors that are new
if (!mac_address)
if (!mac_address)
{
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
if (nbr_not_found)
{
addZeroMacTunnelRoute(neighbor_entry, mac_address);
// only for unresolvable neighbors that are new
if (addZeroMacTunnelRoute(neighbor_entry, mac_address))
{
it = consumer.m_toSync.erase(it);
}
else
{
it++;
continue;
}
}
else
{
/*
* For neighbors that were previously resolvable but are now unresolvable,
* we expect such neighbor entries to be deleted prior to a zero MAC update
* arriving for that same neighbor.
*/
it = consumer.m_toSync.erase(it);
}
it = consumer.m_toSync.erase(it);
}
else if (addNeighbor(neighbor_entry, mac_address))
{
Expand Down Expand Up @@ -1744,12 +1760,18 @@ void NeighOrch::updateSrv6Nexthop(const NextHopKey &nh, const sai_object_id_t &n
m_syncdNextHops.erase(nh);
}
}
void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)

bool NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
{
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
NeighborUpdate update = {entry, mac, true};
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
m_syncdNeighbors[entry] = { mac, false };
}
if (mux_orch->isStandaloneTunnelRouteInstalled(entry.ip_address))
{
m_syncdNeighbors[entry] = { mac, false };
return true;
}

return false;
}
2 changes: 1 addition & 1 deletion orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class NeighOrch : public Orch, public Subject, public Observer
bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
void clearResolvedNeighborEntry(const NeighborEntry &);

void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
bool addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
};

#endif /* SWSS_NEIGHORCH_H */
31 changes: 31 additions & 0 deletions tests/test_mux.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,37 @@ def test_neighbor_miss(
expected_mac=mac if exp_result[REAL_MAC] else '00:00:00:00:00:00'
)

def test_neighbor_miss_no_mux(
self, dvs, dvs_route, setup_vlan, setup_tunnel, setup,
setup_peer_switch, neighbor_cleanup, testlog
):
config_db = dvs.get_config_db()
appdb = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)

test_ip = self.SERV1_SOC_IPV4
self.ping_ip(dvs, test_ip)

# no mux present, no standalone tunnel route installed
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=False)

# setup the mux
config_db = dvs.get_config_db()
self.create_mux_cable(config_db)
# tunnel route should be installed immediately after mux setup
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)

# set port state as standby
self.set_mux_state(appdb, "Ethernet0", "standby")
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)

# set port state as active
self.set_mux_state(appdb, "Ethernet0", "active")
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=True)

# clear the FAILED neighbor
self.clear_neighbors(dvs)
self.check_neighbor_state(dvs, dvs_route, test_ip, expect_route=False)

def test_neighbor_miss_no_peer(
self, dvs, dvs_route, setup_vlan, setup_mux_cable, setup_tunnel,
remove_peer_switch, neighbor_cleanup, testlog
Expand Down

0 comments on commit c949517

Please sign in to comment.