Skip to content

Commit

Permalink
Support neighsyncd system warmreboot. (#661)
Browse files Browse the repository at this point in the history
* Support neighsyncd system warmreboot.

neighsyncd will waits for kernel restore process to be done
before reconciliation

Add vs testcases to cover kernel neighbor table restore process
and neignsyncd process upon system warm reboot

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>

* Add the neigh_restore table to swss-schema.md
Make the state check function more accurate.

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>

* Add restore_neighbors.py to be part of swss deb pkg:

In case system warm reboot is enabled, it will try to restore the neighbor
table from appDB into kernel through netlink API calls and update the neighbor
table by sending arp/ns requests to all neighbor entries, then it sets the
stateDB flag for neighsyncd to continue the reconciliation process.

Added timeout in neighsyncd when waiting for restore_neighbors to finish
Updated vs testcases

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>

* Use chrono::steady_clock in neighsyncd for time check
Use monotonic lib for python time check

Update the warmrestart python binding lib and
re-enabled restore cnt check in vs tests

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>

* Use table hget to simply the code
Time-out value changes
vs test case changes to support default host side neigh table settings.

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>

* Fix vs test cases after merge

Signed-off-by: Zhenggen Xu <zxu@linkedin.com>
  • Loading branch information
zhenggen-xu authored and lguohan committed Nov 12, 2018
1 parent f380685 commit afdcf34
Show file tree
Hide file tree
Showing 8 changed files with 677 additions and 58 deletions.
1 change: 1 addition & 0 deletions debian/swss.install
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ swssconfig/sample/th.64ports.buffers.json etc/swss/config.d
swssconfig/sample/th.64ports.qos.json etc/swss/config.d
swssconfig/sample/th2.118ports.buffers.json etc/swss/config.d
swssconfig/sample/th2.118ports.qos.json etc/swss/config.d
neighsyncd/restore_neighbors.py usr/bin
5 changes: 5 additions & 0 deletions doc/swss-schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,11 @@ Stores information for physical switch ports managed by the switch chip. Ports t
; dynanic data like port state, neighbor, routes
; and so on.

### NEIGH_RESTORE_TABLE
;State for neighbor table restoring process during warm reboot
key = NEIGH_RESTORE_TABLE|Flags
restored = "true" / "false" ; restored state

## Configuration files
What configuration files should we have? Do apps, orch agent each need separate files?

Expand Down
17 changes: 16 additions & 1 deletion neighsyncd/neighsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,27 @@
using namespace std;
using namespace swss;

NeighSync::NeighSync(RedisPipeline *pipelineAppDB) :
NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) :
m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME),
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
m_AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", &m_neighTable, DEFAULT_NEIGHSYNC_WARMSTART_TIMER)
{
}

// Check if neighbor table is restored in kernel
bool NeighSync::isNeighRestoreDone()
{
string value;

m_stateNeighRestoreTable.hget("Flags", "restored", value);
if (value == "true")
{
SWSS_LOG_NOTICE("neighbor table restore to kernel is done");
return true;
}
return false;
}

void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
{
char ipStr[MAX_ADDR_SIZE + 1] = {0};
Expand Down
10 changes: 9 additions & 1 deletion neighsyncd/neighsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,31 @@

#define DEFAULT_NEIGHSYNC_WARMSTART_TIMER 5

//This is the timer value (in seconds) that the neighsyncd waiting for restore_neighbors
//service to finish, should be longer than the restore_neighbors timeout value (60)
//This should not happen, if happens, system is in a unknown state, we should exit.
#define RESTORE_NEIGH_WAIT_TIME_OUT 70

namespace swss {

class NeighSync : public NetMsg
{
public:
enum { MAX_ADDR_SIZE = 64 };

NeighSync(RedisPipeline *pipelineAppDB);
NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb);

virtual void onMsg(int nlmsg_type, struct nl_object *obj);

bool isNeighRestoreDone();

AppRestartAssist *getRestartAssist()
{
return &m_AppRestartAssist;
}

private:
Table m_stateNeighRestoreTable;
ProducerStateTable m_neighTable;
AppRestartAssist m_AppRestartAssist;
};
Expand Down
34 changes: 29 additions & 5 deletions neighsyncd/neighsyncd.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <chrono>
#include "logger.h"
#include "select.h"
#include "netdispatcher.h"
Expand All @@ -14,8 +17,9 @@ int main(int argc, char **argv)

DBConnector appDb(APPL_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);
RedisPipeline pipelineAppDB(&appDb);
DBConnector stateDb(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);

NeighSync sync(&pipelineAppDB);
NeighSync sync(&pipelineAppDB, &stateDb);

NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync);
NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync);
Expand All @@ -27,16 +31,36 @@ int main(int argc, char **argv)
NetLink netlink;
Select s;

netlink.registerGroup(RTNLGRP_NEIGH);
cout << "Listens to neigh messages..." << endl;
netlink.dumpRequest(RTM_GETNEIGH);
using namespace std::chrono;

s.addSelectable(&netlink);
if (sync.getRestartAssist()->isWarmStartInProgress())
{
sync.getRestartAssist()->readTableToMap();

steady_clock::time_point starttime = steady_clock::now();
while (!sync.isNeighRestoreDone())
{
duration<double> time_span =
duration_cast<duration<double>>(steady_clock::now() - starttime);
int pasttime = int(time_span.count());
SWSS_LOG_INFO("waited neighbor table to be restored to kernel"
" for %d seconds", pasttime);
if (pasttime > RESTORE_NEIGH_WAIT_TIME_OUT)
{
SWSS_LOG_ERROR("neighbor table restore is not finished"
" after timed-out, exit!!!");
exit(EXIT_FAILURE);
}
sleep(1);
}
sync.getRestartAssist()->startReconcileTimer(s);
}

netlink.registerGroup(RTNLGRP_NEIGH);
cout << "Listens to neigh messages..." << endl;
netlink.dumpRequest(RTM_GETNEIGH);

s.addSelectable(&netlink);
while (true)
{
Selectable *temps;
Expand Down
245 changes: 245 additions & 0 deletions neighsyncd/restore_neighbors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
#!/usr/bin/env python

""""
Description: restore_neighbors.py -- restoring neighbor table into kernel during system warm reboot.
The script is started by supervisord in swss docker when the docker is started.
If does not do anything in case warm restart is not enabled.
In case system warm reboot is enabled, it will try to restore the neighbor table into kernel
through netlink API calls and update the neigh table by sending arp/ns requests to all neighbor
entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process.
In case docker restart enabled only, it sets the stateDB flag so neighsyncd can follow
the same logic.
"""

import sys
import swsssdk
import netifaces
import time
import monotonic
from pyroute2 import IPRoute, NetlinkError
from pyroute2.netlink.rtnl import ndmsg
from socket import AF_INET,AF_INET6
import logging
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr
from swsscommon import swsscommon
import errno

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
logger.addHandler(logging.NullHandler())

# timeout the restore process in 1 min if not finished
# This is mostly to wait for interfaces to be created and up after warm-reboot
# It would be good to keep that below routing reconciliation time-out.
TIME_OUT = 60

# every 5 seconds to check interfaces state
CHECK_INTERVAL = 5

ip_family = {"IPv4": AF_INET, "IPv6": AF_INET6}

# return the first ipv4/ipv6 address assigned on intf
def first_ip_on_intf(intf, family):
if intf in netifaces.interfaces():
ipaddresses = netifaces.ifaddresses(intf)
if ip_family[family] in ipaddresses:
# cover link local address as well
return ipaddresses[ip_family[family]][0]['addr'].split("%")[0]
return None

# check if the intf is operational up
def is_intf_oper_state_up(intf):
oper_file = '/sys/class/net/{0}/carrier'
try:
state_file = open(oper_file.format(intf), 'r')
state = state_file.readline().rstrip()
except Exception as e:
logger.info('Error: {}'.format(str(e)))
return False
if state == '1':
return True
return False

# read the neigh table from AppDB to memory, format as below
# build map as below, this can efficiently access intf and family groups later
# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] }
# { family2 -> [[ipM, macM], [ipN, macN] ...] } },
# ...
# intfA -> { { family1 -> [[ipW, macW], [ipX, macX] ...] }
# { family2 -> [[ipY, macY], [ipZ, macZ] ...] } }
# }
#
# Alternatively:
# 1, we can build:
# { intf1 -> [[family1, ip1, mac1], [family2, ip2, mac2] ...]},
# ...
# { intfA -> [[family1, ipX, macX], [family2, ipY, macY] ...]}
#
# 2, Or simply build two maps based on families
# These alternative solutions would have worse performance because:
# 1, need iterate the whole list if only one family is up.
# 2, need check interface state twice due to the split map

def read_neigh_table_to_maps():
db = swsssdk.SonicV2Connector(host='127.0.0.1')
db.connect(db.APPL_DB, False)

intf_neigh_map = {}

keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*')
keys = [] if keys is None else keys
for key in keys:
key_split = key.split(':', 2)
intf_name = key_split[1]
if intf_name == 'lo':
continue
dst_ip = key_split[2]
value = db.get_all(db.APPL_DB, key)
if 'neigh' in value and 'family' in value:
dmac = value['neigh']
family = value['family']
else:
raise RuntimeError('Neigh table format is incorrect')

if family not in ip_family:
raise RuntimeError('Neigh table format is incorrect')

ip_mac_pair = []
ip_mac_pair.append(dst_ip)
ip_mac_pair.append(dmac)

intf_neigh_map.setdefault(intf_name, {}).setdefault(family, []).append(ip_mac_pair)
db.close(db.APPL_DB)
return intf_neigh_map


# Use netlink to set neigh table into kernel, not overwrite the existing ones
def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac):
logging.info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
family, intf_idx, dst_ip, dmac))

if family not in ip_family:
return

family_af_inet = ip_family[family]
try :
ipclass.neigh('add',
family=family_af_inet,
dst=dst_ip,
lladdr=dmac,
ifindex=intf_idx,
state=ndmsg.states['reachable'])
# If neigh exists, log it but no exception raise, other exceptions, raise
except NetlinkError as e:
if e[0] == errno.EEXIST:
logger.warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
family, intf_idx, dst_ip, dmac))
else:
raise

# build ARP or NS packets depending on family
def build_arp_ns_pkt(family, smac, src_ip, dst_ip):
if family == 'IPv4':
eth = Ether(src=smac, dst='ff:ff:ff:ff:ff:ff')
pkt = eth/ARP(op=ARP.who_has, pdst=dst_ip)
elif family == 'IPv6':
nsma = in6_getnsma(inet_pton(AF_INET6, dst_ip))
mcast_dst_ip = inet_ntop(AF_INET6, nsma)
dmac = in6_getnsmac(nsma)
eth = Ether(src=smac,dst=dmac)
ipv6 = IPv6(src=src_ip, dst=mcast_dst_ip)
ns = ICMPv6ND_NS(tgt=dst_ip)
ns_opt = ICMPv6NDOptSrcLLAddr(lladdr=smac)
pkt = eth/ipv6/ns/ns_opt
return pkt

# Set the statedb "NEIGH_RESTORE_TABLE|Flags", so neighsyncd can start reconciliation
def set_statedb_neigh_restore_done():
db = swsssdk.SonicV2Connector(host='127.0.0.1')
db.connect(db.STATE_DB, False)
db.set(db.STATE_DB, 'NEIGH_RESTORE_TABLE|Flags', 'restored', 'true')
db.close(db.STATE_DB)
return

def restore_update_kernel_neighbors(intf_neigh_map):
# create object for netlink calls to kernel
ipclass = IPRoute()
mtime = monotonic.time.time
start_time = mtime()
while (mtime() - start_time) < TIME_OUT:
for intf, family_neigh_map in intf_neigh_map.items():
# only try to restore to kernel when link is up
if is_intf_oper_state_up(intf):
src_mac = get_if_hwaddr(intf)
intf_idx = ipclass.link_lookup(ifname=intf)[0]
# create socket per intf to send packets
s = conf.L2socket(iface=intf)

# Only two families: 'IPv4' and 'IPv6'
for family in ip_family.keys():
# if ip address assigned and if we have neighs in this family, restore them
src_ip = first_ip_on_intf(intf, family)
if src_ip and (family in family_neigh_map):
neigh_list = family_neigh_map[family]
for dst_ip, dmac in neigh_list:
# use netlink to set neighbor entries
set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac)

# best effort to update kernel neigh info
# this will be updated by arp_update later too
s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip))
# delete this family on the intf
del intf_neigh_map[intf][family]
# close the pkt socket
s.close()

# if all families are deleted, remove the key
if len(intf_neigh_map[intf]) == 0:
del intf_neigh_map[intf]
# map is empty, all neigh entries are restored
if not intf_neigh_map:
break
time.sleep(CHECK_INTERVAL)


def main():

print "restore_neighbors service is started"

# Use warmstart python binding
warmstart = swsscommon.WarmStart()
warmstart.initialize("neighsyncd", "swss")
warmstart.checkWarmStart("neighsyncd", "swss", False)

# if swss or system warm reboot not enabled, don't run
if not warmstart.isWarmStart():
print "restore_neighbors service is skipped as warm restart not enabled"
return

# swss restart not system warm reboot
if not warmstart.isSystemWarmRebootEnabled():
set_statedb_neigh_restore_done()
print "restore_neighbors service is done as system warm reboot not enabled"
return

# read the neigh table from appDB to internal map
try:
intf_neigh_map = read_neigh_table_to_maps()
except RuntimeError as e:
logger.exception(str(e))
sys.exit(1)

try:
restore_update_kernel_neighbors(intf_neigh_map)
except Exception as e:
logger.exception(str(e))
sys.exit(1)

# set statedb to signal other processes like neighsyncd
set_statedb_neigh_restore_done()
print "restore_neighbor service is done for system warmreboot"
return

if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ def runcmd(self, cmd):
def runcmd_async(self, cmd):
return subprocess.Popen("ip netns exec %s %s" % (self.nsname, cmd), shell=True)

def runcmd_output(self, cmd):
return subprocess.check_output("ip netns exec %s %s" % (self.nsname, cmd), shell=True)

class DockerVirtualSwitch(object):
def __init__(self, name=None, keeptb=False):
self.basicd = ['redis-server',
Expand Down
Loading

0 comments on commit afdcf34

Please sign in to comment.