Skip to content

Commit

Permalink
zebra: separate nht notifications from dplane_result thread
Browse files Browse the repository at this point in the history
There is a CPU issue in ZEBRA when BGP installs and removes
a lot of routes at the same time. The vtysh and shell become
unreachable. This is the case of BGP failover scenarios with
two peers, and one of the peers becoming unreachable.

For each route change, it appears that nexthop tracking is
called to check impact about a new route (un)availability.
Two observations are done:

- In the case of a specific route change, if a bigger route
(or a default route is present like it is in the setup) exists,
then nexthop tracking is called. there is no need to call nexthop
tracking for the same default prefix, knowing that the
dplane_result thread handled bulks of routes at the same time.

- The first picture from the below link indicates nexthop
tracking consumes time, and maintaining this activity in
the zebra main thread will still result in STARVATION messages.

Propose to separate the nht notifications from the dplane_result
thread by creating a queue list that will store the prefixes
to evaluate against nexthop tracking. Before enqueuing it, a check
is done if the same prefix has not been called before.
The processing is done in a separate 'rib_process_nht_thread_loop'
function call.

Link: #16028

Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
  • Loading branch information
pguibert6WIND committed May 31, 2024
1 parent 1e64cc6 commit 3c117b3
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 8 deletions.
4 changes: 3 additions & 1 deletion zebra/rib.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ extern struct route_table *rib_tables_iter_next(rib_tables_iter_t *iter);
extern uint8_t route_distance(int type);

extern void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq,
bool rt_delete);
bool rt_delete, bool enqueue_to_list);
extern void rib_process_nht_thread_loop(struct event *event);

/*
* rib_find_rn_from_ctx
Expand Down Expand Up @@ -628,6 +629,7 @@ extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
uint8_t instance);

extern void zebra_vty_init(void);
extern void zebra_rnh_job_list_display(struct vty *vty);

extern pid_t pid;

Expand Down
112 changes: 105 additions & 7 deletions zebra/zebra_rib.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ DEFINE_MTYPE(ZEBRA, RE, "Route Entry");
DEFINE_MTYPE_STATIC(ZEBRA, RIB_DEST, "RIB destination");
DEFINE_MTYPE_STATIC(ZEBRA, RIB_UPDATE_CTX, "Rib update context object");
DEFINE_MTYPE_STATIC(ZEBRA, WQ_WRAPPER, "WQ wrapper");
DEFINE_MTYPE_STATIC(ZEBRA, RNH_JOB_CTX, "Rnh Job context");

/*
* Event, list, and mutex for delivery of dataplane results
Expand Down Expand Up @@ -821,11 +822,80 @@ static int rib_can_delete_dest(rib_dest_t *dest)
return 1;
}

PREDECL_DLIST(zebra_rnh_job_list);
struct zebra_rnh_job_list_head zebra_rnh_list;
struct zebra_rnh_job_ctx {
vrf_id_t vrf_id;
struct prefix prefix;
safi_t safi;
/* Embedded list linkage */
struct zebra_rnh_job_list_item rnh_entries;
};
DECLARE_DLIST(zebra_rnh_job_list, struct zebra_rnh_job_ctx, rnh_entries);
static uint32_t zebra_rnh_job_list_num;
static uint32_t zebra_rnh_job_list_dup;
static uint32_t zebra_rnh_job_list_processed;
static uint32_t zebra_rnh_job_list_max_batch;
static struct event *t_zebra_rnh_job_list;

void zebra_rnh_job_list_display(struct vty *vty)
{
vty_out(vty,
"RIB route evaluation count %u, dup %u, processed %u, max per batch %u\n",
zebra_rnh_job_list_num, zebra_rnh_job_list_dup,
zebra_rnh_job_list_processed, zebra_rnh_job_list_max_batch);
}

void rib_process_nht_thread_loop(struct event *event)
{
struct zebra_rnh_job_list_head ctxlist;
struct zebra_rnh_job_ctx *ctx;
struct zebra_vrf *zvrf;
uint32_t count = 0;

do {
zebra_rnh_job_list_init(&ctxlist);

/* Dequeue list of context structs */
while ((ctx = zebra_rnh_job_list_pop(&zebra_rnh_list)) != NULL)
zebra_rnh_job_list_add_tail(&ctxlist, ctx);

/* Dequeue context block */
ctx = zebra_rnh_job_list_pop(&ctxlist);
/* If we've emptied the results queue, we're done */
if (ctx == NULL)
break;
while (ctx) {
zvrf = zebra_vrf_lookup_by_id(ctx->vrf_id);
if (zvrf) {
zebra_rnh_job_list_processed++;
count++;
zebra_evaluate_rnh(zvrf,
family2afi(ctx->prefix.family),
0, &ctx->prefix, ctx->safi);
}
XFREE(MTYPE_RNH_JOB_CTX, ctx);
ctx = zebra_rnh_job_list_pop(&ctxlist);
}
} while (1);

if (count > zebra_rnh_job_list_max_batch)
zebra_rnh_job_list_max_batch = count;
}

static void rib_process_nht(void)
{
event_add_timer_msec(zrouter.master, rib_process_nht_thread_loop, NULL,
5, &t_zebra_rnh_job_list);
}

void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq,
bool rt_delete)
bool rt_delete, bool enqueue_to_list)
{
rib_dest_t *dest = rib_dest_from_rnode(rn);
struct rnh *rnh;
struct zebra_rnh_job_ctx *ctx;
bool found;

/*
* We are storing the rnh's associated withb
Expand Down Expand Up @@ -892,8 +962,31 @@ void zebra_rib_evaluate_rn_nexthops(struct route_node *rn, uint32_t seq,
}

rnh->seqno = seq;
zebra_evaluate_rnh(zvrf, family2afi(p->family), 0, p,
rnh->safi);
if (enqueue_to_list) {
zebra_rnh_job_list_num++;
found = false;
frr_each_safe (zebra_rnh_job_list,
&zebra_rnh_list, ctx) {
if (rnh->safi == ctx->safi &&
zvrf->vrf->vrf_id == ctx->vrf_id &&
prefix_same(&ctx->prefix, p)) {
found = true;
zebra_rnh_job_list_dup++;
break;
}
}
if (!found) {
ctx = XCALLOC(MTYPE_RNH_JOB_CTX,
sizeof(struct zebra_rnh_job_ctx));
ctx->vrf_id = zvrf->vrf->vrf_id;
ctx->safi = rnh->safi;
prefix_copy(&ctx->prefix, p);
zebra_rnh_job_list_add_tail(&zebra_rnh_list,
ctx);
}
} else
zebra_evaluate_rnh(zvrf, family2afi(p->family),
0, p, rnh->safi);
}

rn = rn->parent;
Expand Down Expand Up @@ -929,7 +1022,7 @@ int rib_gc_dest(struct route_node *rn)
}

zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(),
true);
true, false);

dest->rnode = NULL;
rnh_list_fini(&dest->nht);
Expand Down Expand Up @@ -2000,7 +2093,7 @@ static void zebra_rib_evaluate_prefix_nhg(struct hash_bucket *b, void *data)
redistribute_update(rn, re, re);
zebra_rib_evaluate_rn_nexthops(rn,
zebra_router_get_next_sequence(),
false);
false, false);
zebra_rib_evaluate_mpls(rn);
}
}
Expand Down Expand Up @@ -2331,7 +2424,7 @@ static void rib_process_result(struct zebra_dplane_ctx *ctx)
zebra_rib_fixup_system(rn);
}

zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete);
zebra_rib_evaluate_rn_nexthops(rn, seq, rt_delete, true);
zebra_rib_evaluate_mpls(rn);
done:

Expand Down Expand Up @@ -2591,7 +2684,7 @@ static void rib_process_dplane_notify(struct zebra_dplane_ctx *ctx)

/* Make any changes visible for lsp and nexthop-tracking processing */
zebra_rib_evaluate_rn_nexthops(rn, zebra_router_get_next_sequence(),
false);
false, false);

zebra_rib_evaluate_mpls(rn);

Expand Down Expand Up @@ -5104,6 +5197,8 @@ static void rib_process_dplane_results(struct event *thread)

} while (1);

rib_process_nht();

#ifdef HAVE_SCRIPTING
if (fs)
frrscript_delete(fs);
Expand Down Expand Up @@ -5156,6 +5251,8 @@ void zebra_rib_init(void)
{
check_route_info();

zebra_rnh_job_list_init(&zebra_rnh_list);

rib_queue_init();

/* Init dataplane, and register for results */
Expand All @@ -5169,6 +5266,7 @@ void zebra_rib_terminate(void)
struct zebra_dplane_ctx *ctx;

EVENT_OFF(t_dplane);
EVENT_OFF(t_zebra_rnh_job_list);

ctx = dplane_ctx_dequeue(&rib_dplane_q);
while (ctx) {
Expand Down
10 changes: 10 additions & 0 deletions zebra/zebra_vty.c
Original file line number Diff line number Diff line change
Expand Up @@ -4086,6 +4086,15 @@ DEFUN (show_dataplane,
return dplane_show_helper(vty, detailed);
}

/* Display dataplane info */
DEFUN(show_rib_info, show_rib_info_cmd, "show rib info",
SHOW_STR "RIB information\n"
"RIB information\n")
{
zebra_rnh_job_list_display(vty);
return CMD_SUCCESS;
}

/* Display dataplane providers info */
DEFUN (show_dataplane_providers,
show_dataplane_providers_cmd,
Expand Down Expand Up @@ -4463,6 +4472,7 @@ void zebra_vty_init(void)
install_element(CONFIG_NODE, &zebra_dplane_queue_limit_cmd);
install_element(CONFIG_NODE, &no_zebra_dplane_queue_limit_cmd);

install_element(VIEW_NODE, &show_rib_info_cmd);
#ifdef HAVE_NETLINK
install_element(CONFIG_NODE, &zebra_kernel_netlink_batch_tx_buf_cmd);
install_element(CONFIG_NODE, &no_zebra_kernel_netlink_batch_tx_buf_cmd);
Expand Down

0 comments on commit 3c117b3

Please sign in to comment.