From 050093c2cf208c63b14f2a931fe0de022edd9af0 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Mon, 30 Sep 2024 15:09:42 -0400 Subject: [PATCH] bgpd: Cleanup multipath figuring out in bgp Currently bgp multipath has these properties: a) mp_info may or may not be on a single path, based upon path perturbations in the past. b) mp_info->count started counting at 0( meaning 1 ). As that the bestpath path_info was never included in the count c) The first mp_info in the list held the multipath data associated with the multipath. As such if you were at any other node that data was not filled in. d) As such the mp_info's that are not first on the list basically were just pointers to the corresponding bgp_path_info that was in the multipath. e) On bestpath calculation, a linklist(struct linklist *) of bgp_path_info's was created. f) This linklist was passed in to a comparison function that took the old mpinfo list and compared it item by item to the linklist and doing magic to figure out how to create a new mp_info list. g) the old mp_info and the link list had to be memory managed and freed up. h) BGP_PATH_MULTIPATH is only set on non bestpath nodes in the multipath. This is really complicated. Let's change the algorithm to this: a) When running bestpath, mark a bgp_path_info node that could be in the ecmp path as BGP_PATH_MULTIPATH_NEW. b) When running multipath, just walk the list of bgp_path_info's and if it has BGP_PATH_MULTIPATH_NEW on it, decide if it is in BGP_MULTIPATH. If we run out of space to put in the ecmp, clear the flag on the rest. c) Clean up the counting of sometimes adding 1 to the mpath count. d) Only allocate a mpath_info node for the bestpath. Clean it up when done with it. e) remove the unneeded list management associated with the linklist and the mp_list. This greatly simplifies multipath computation for bgp and reduces memory load for large scale deployments. 2 full feeds in work_queue_run prior: 0 56367.471 1123 50193 493695 50362 493791 0 0 0 TE work_queue_run BGP multipath info : 1941844 48 110780992 1941844 110780992 2 full feeds in work_queue_run after change: 1 52924.931 1296 40837 465968 41025 487390 0 0 1 TE work_queue_run BGP multipath info : 970860 32 38836880 970866 38837120 Aproximately 4 seconds of saved cpu time for convergence and ~75 mb smaller run time. Signed-off-by: Donald Sharp --- bgpd/bgp_mpath.c | 427 +++++++++------------------------- bgpd/bgp_mpath.h | 15 +- bgpd/bgp_route.c | 21 +- bgpd/bgp_route.h | 14 ++ bgpd/bgp_routemap.c | 2 +- tests/bgpd/subdir.am | 11 - tests/bgpd/test_mpath.c | 482 --------------------------------------- tests/bgpd/test_mpath.py | 10 - 8 files changed, 134 insertions(+), 848 deletions(-) delete mode 100644 tests/bgpd/test_mpath.c delete mode 100644 tests/bgpd/test_mpath.py diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index beac1e97b28e..e27b78977708 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -2,8 +2,10 @@ /* * BGP Multipath * Copyright (C) 2010 Google Inc. + * 2024 Nvidia Corporation + * Donald Sharp * - * This file is part of Quagga + * This file is part of FRR */ #include @@ -190,78 +192,6 @@ int bgp_path_info_nexthop_cmp(struct bgp_path_info *bpi1, return compare; } -/* - * bgp_path_info_mpath_cmp - * - * This function determines our multipath list ordering. By ordering - * the list we can deterministically select which paths are included - * in the multipath set. The ordering also helps in detecting changes - * in the multipath selection so we can detect whether to send an - * update to zebra. - * - * The order of paths is determined first by received nexthop, and then - * by peer address if the nexthops are the same. - */ -static int bgp_path_info_mpath_cmp(void *val1, void *val2) -{ - struct bgp_path_info *bpi1, *bpi2; - int compare; - - bpi1 = val1; - bpi2 = val2; - - compare = bgp_path_info_nexthop_cmp(bpi1, bpi2); - - if (!compare) { - if (!bpi1->peer->su_remote && !bpi2->peer->su_remote) - compare = 0; - else if (!bpi1->peer->su_remote) - compare = 1; - else if (!bpi2->peer->su_remote) - compare = -1; - else - compare = sockunion_cmp(bpi1->peer->su_remote, - bpi2->peer->su_remote); - } - - return compare; -} - -/* - * bgp_mp_list_init - * - * Initialize the mp_list, which holds the list of multipaths - * selected by bgp_best_selection - */ -void bgp_mp_list_init(struct list *mp_list) -{ - assert(mp_list); - memset(mp_list, 0, sizeof(struct list)); - mp_list->cmp = bgp_path_info_mpath_cmp; -} - -/* - * bgp_mp_list_clear - * - * Clears all entries out of the mp_list - */ -void bgp_mp_list_clear(struct list *mp_list) -{ - assert(mp_list); - list_delete_all_node(mp_list); -} - -/* - * bgp_mp_list_add - * - * Adds a multipath entry to the mp_list - */ -void bgp_mp_list_add(struct list *mp_list, struct bgp_path_info *mpinfo) -{ - assert(mp_list && mpinfo); - listnode_add_sort(mp_list, mpinfo); -} - /* * bgp_path_info_mpath_new * @@ -274,6 +204,7 @@ static struct bgp_path_info_mpath *bgp_path_info_mpath_new(void) new_mpath = XCALLOC(MTYPE_BGP_MPATH_INFO, sizeof(struct bgp_path_info_mpath)); + new_mpath->mp_count = 1; return new_mpath; } @@ -287,6 +218,8 @@ void bgp_path_info_mpath_free(struct bgp_path_info_mpath **mpath) if (mpath && *mpath) { if ((*mpath)->mp_attr) bgp_attr_unintern(&(*mpath)->mp_attr); + (*mpath)->mp_attr = NULL; + XFREE(MTYPE_BGP_MPATH_INFO, *mpath); } } @@ -313,31 +246,6 @@ bgp_path_info_mpath_get(struct bgp_path_info *path) return path->mpath; } -/* - * bgp_path_info_mpath_enqueue - * - * Enqueue a path onto the multipath list given the previous multipath - * list entry - */ -static void bgp_path_info_mpath_enqueue(struct bgp_path_info *prev_info, - struct bgp_path_info *path) -{ - struct bgp_path_info_mpath *prev, *mpath; - - prev = bgp_path_info_mpath_get(prev_info); - mpath = bgp_path_info_mpath_get(path); - if (!prev || !mpath) - return; - - mpath->mp_nexta = prev->mp_nexta; - mpath->mp_prev = prev; - if (prev->mp_nexta) - prev->mp_nexta->mp_prev = mpath; - prev->mp_nexta = mpath; - - SET_FLAG(path->flags, BGP_PATH_MULTIPATH); -} - /* * bgp_path_info_mpath_dequeue * @@ -346,14 +254,9 @@ static void bgp_path_info_mpath_enqueue(struct bgp_path_info *prev_info, void bgp_path_info_mpath_dequeue(struct bgp_path_info *path) { struct bgp_path_info_mpath *mpath = path->mpath; + if (!mpath) return; - if (mpath->mp_prev) - mpath->mp_prev->mp_nexta = mpath->mp_nexta; - if (mpath->mp_nexta) - mpath->mp_nexta->mp_prev = mpath->mp_prev; - mpath->mp_nexta = mpath->mp_prev = NULL; - UNSET_FLAG(path->flags, BGP_PATH_MULTIPATH); } /* @@ -363,9 +266,16 @@ void bgp_path_info_mpath_dequeue(struct bgp_path_info *path) */ struct bgp_path_info *bgp_path_info_mpath_next(struct bgp_path_info *path) { - if (!path->mpath || !path->mpath->mp_nexta) - return NULL; - return path->mpath->mp_nexta->mp_info; + path = path->next; + + while (path) { + if (CHECK_FLAG(path->flags, BGP_PATH_MULTIPATH)) + return path; + + path = path->next; + } + + return NULL; } /* @@ -386,7 +296,8 @@ struct bgp_path_info *bgp_path_info_mpath_first(struct bgp_path_info *path) uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path) { if (!path->mpath) - return 0; + return 1; + return path->mpath->mp_count; } @@ -515,58 +426,51 @@ static void bgp_path_info_mpath_attr_set(struct bgp_path_info *path, /* * bgp_path_info_mpath_update * - * Compare and sync up the multipath list with the mp_list generated by - * bgp_best_selection + * Compare and sync up the multipath flags with what was choosen + * in best selection */ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, - struct bgp_path_info *new_best, - struct bgp_path_info *old_best, - struct list *mp_list, - struct bgp_maxpaths_cfg *mpath_cfg) + struct bgp_path_info *new_best, struct bgp_path_info *old_best, + uint32_t num_candidates, struct bgp_maxpaths_cfg *mpath_cfg) { uint16_t maxpaths, mpath_count, old_mpath_count; uint64_t bwval; uint64_t cum_bw, old_cum_bw; - struct listnode *mp_node, *mp_next_node; - struct bgp_path_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath; - int mpath_changed, debug; + struct bgp_path_info *cur_iterator = NULL; + bool mpath_changed, debug; bool all_paths_lb; char path_buf[PATH_ADDPATH_STR_BUFFER]; + bool old_mpath, new_mpath; - mpath_changed = 0; + mpath_changed = false; maxpaths = multipath_num; mpath_count = 0; - cur_mpath = NULL; old_mpath_count = 0; old_cum_bw = cum_bw = 0; - prev_mpath = new_best; - mp_node = listhead(mp_list); debug = bgp_debug_bestpath(dest); - if (new_best) { - mpath_count++; - if (new_best != old_best) - bgp_path_info_mpath_dequeue(new_best); - maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) - ? mpath_cfg->maxpaths_ibgp - : mpath_cfg->maxpaths_ebgp; - } - if (old_best) { - cur_mpath = bgp_path_info_mpath_first(old_best); old_mpath_count = bgp_path_info_mpath_count(old_best); + if (old_mpath_count == 1) + SET_FLAG(old_best->flags, BGP_PATH_MULTIPATH); old_cum_bw = bgp_path_info_mpath_cumbw(old_best); bgp_path_info_mpath_count_set(old_best, 0); bgp_path_info_mpath_lb_update(old_best, false, false, 0); - bgp_path_info_mpath_dequeue(old_best); + bgp_path_info_mpath_free(&old_best->mpath); + old_best->mpath = NULL; + } + + if (new_best) { + maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) ? mpath_cfg->maxpaths_ibgp + : mpath_cfg->maxpaths_ebgp; + cur_iterator = new_best; } if (debug) - zlog_debug("%pBD(%s): starting mpath update, newbest %s num candidates %d old-mpath-count %d old-cum-bw %" PRIu64, - dest, bgp->name_pretty, - new_best ? new_best->peer->host : "NONE", - mp_list ? listcount(mp_list) : 0, old_mpath_count, - old_cum_bw); + zlog_debug("%pBD(%s): starting mpath update, newbest %s num candidates %d old-mpath-count %d old-cum-bw %" PRIu64 + " maxpaths set %u", + dest, bgp->name_pretty, new_best ? new_best->peer->host : "NONE", + num_candidates, old_mpath_count, old_cum_bw, maxpaths); /* * We perform an ordered walk through both lists in parallel. @@ -580,210 +484,100 @@ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, * to skip over it */ all_paths_lb = true; /* We'll reset if any path doesn't have LB. */ - while (mp_node || cur_mpath) { - struct bgp_path_info *tmp_info; + while (cur_iterator) { + old_mpath = CHECK_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + new_mpath = CHECK_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); + + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); /* - * We can bail out of this loop if all existing paths on the - * multipath list have been visited (for cleanup purposes) and - * the maxpath requirement is fulfulled + * If the current mpath count is equal to the number of + * maxpaths that can be used then we can bail, after + * we clean up the flags associated with the rest of the + * bestpaths */ - if (!cur_mpath && (mpath_count >= maxpaths)) + if (mpath_count >= maxpaths) { + while (cur_iterator) { + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); + + cur_iterator = cur_iterator->next; + } break; - mp_next_node = mp_node ? listnextnode(mp_node) : NULL; - next_mpath = - cur_mpath ? bgp_path_info_mpath_next(cur_mpath) : NULL; - tmp_info = mp_node ? listgetdata(mp_node) : NULL; + if (debug) + zlog_debug("%pBD(%s): Mpath count %u is equal to maximum paths allowed, finished comparision for MPATHS", + dest, bgp->name_pretty, mpath_count); + } if (debug) - zlog_debug("%pBD(%s): comparing candidate %s with existing mpath %s", - dest, bgp->name_pretty, - tmp_info ? tmp_info->peer->host : "NONE", - cur_mpath ? cur_mpath->peer->host : "NONE"); - + zlog_debug("%pBD(%s): Candidate %s old_mpath: %u new_mpath: %u, Nexthop %pI4 current mpath count: %u", + dest, bgp->name_pretty, cur_iterator->peer->host, old_mpath, + new_mpath, &cur_iterator->attr->nexthop, mpath_count); /* - * If equal, the path was a multipath and is still a multipath. - * Insert onto new multipath list if maxpaths allows. + * There is nothing to do if the cur_iterator is neither a old path + * or a new path */ - if (mp_node && (listgetdata(mp_node) == cur_mpath)) { - list_delete_node(mp_list, mp_node); - bgp_path_info_mpath_dequeue(cur_mpath); - if ((mpath_count < maxpaths) && prev_mpath) { - mpath_count++; - if (bgp_path_info_nexthop_cmp(prev_mpath, - cur_mpath)) { - if (ecommunity_linkbw_present( - bgp_attr_get_ecommunity( - cur_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - cur_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: %s is still multipath, cur count %d", - dest, path_buf, - mpath_count); - } - } else { - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: nexthop equal, however add mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &cur_mpath->attr->nexthop, - mpath_count); - } - } - bgp_path_info_mpath_enqueue(prev_mpath, - cur_mpath); - prev_mpath = cur_mpath; - } else { - mpath_changed = 1; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: remove mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &cur_mpath->attr->nexthop, - mpath_count); - } - } - mp_node = mp_next_node; - cur_mpath = next_mpath; + if (!old_mpath && !new_mpath) { + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + cur_iterator = cur_iterator->next; continue; } - if (cur_mpath - && (!mp_node - || (bgp_path_info_mpath_cmp(cur_mpath, - listgetdata(mp_node)) - < 0))) { - /* - * If here, we have an old multipath and either the - * mp_list - * is finished or the next mp_node points to a later - * multipath, so we need to purge this path from the - * multipath list - */ - bgp_path_info_mpath_dequeue(cur_mpath); - mpath_changed = 1; + if (new_mpath) { + mpath_count++; + + if (cur_iterator != new_best) + SET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + + if (!old_mpath) + mpath_changed = true; + + if (ecommunity_linkbw_present(bgp_attr_get_ecommunity(cur_iterator->attr), + &bwval) || + ecommunity_linkbw_present(bgp_attr_get_ipv6_ecommunity( + cur_iterator->attr), + &bwval)) + cum_bw += bwval; + else + all_paths_lb = false; + if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, sizeof(path_buf)); - zlog_debug("%pBD: remove mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &cur_mpath->attr->nexthop, - mpath_count); + bgp_path_info_path_with_addpath_rx_str(cur_iterator, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d cum_bw: %" PRIu64 + " all_paths_lb: %u", + dest, path_buf, &cur_iterator->attr->nexthop, + mpath_count, cum_bw, all_paths_lb); } - cur_mpath = next_mpath; } else { /* - * If here, we have a path on the mp_list that was not - * previously - * a multipath (due to non-equivalance or maxpaths - * exceeded), - * or the matching multipath is sorted later in the - * multipath - * list. Before we enqueue the path on the new multipath - * list, - * make sure its not on the old_best multipath list or - * referenced - * via next_mpath: - * - If next_mpath points to this new path, update - * next_mpath to - * point to the multipath after this one - * - Dequeue the path from the multipath list just to - * make sure + * We know that old_mpath is true and new_mpath is false in this path */ - new_mpath = listgetdata(mp_node); - list_delete_node(mp_list, mp_node); - assert(new_mpath); - assert(prev_mpath); - if ((mpath_count < maxpaths) && (new_mpath != new_best)) { - /* keep duplicate nexthop */ - bgp_path_info_mpath_dequeue(new_mpath); - - bgp_path_info_mpath_enqueue(prev_mpath, - new_mpath); - mpath_changed = 1; - mpath_count++; - if (bgp_path_info_nexthop_cmp(prev_mpath, - new_mpath)) { - if (ecommunity_linkbw_present( - bgp_attr_get_ecommunity( - new_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - new_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - new_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &new_mpath->attr - ->nexthop, - mpath_count); - } - } else { - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - new_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: nexthop equal, however add mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &new_mpath->attr - ->nexthop, - mpath_count); - } - } - prev_mpath = new_mpath; - } - mp_node = mp_next_node; + mpath_changed = true; + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); } + + cur_iterator = cur_iterator->next; } if (new_best) { - bgp_path_info_mpath_count_set(new_best, mpath_count - 1); - if (mpath_count <= 1 || - (!ecommunity_linkbw_present(bgp_attr_get_ecommunity( - new_best->attr), - &bwval) && - !ecommunity_linkbw_present(bgp_attr_get_ipv6_ecommunity( - new_best->attr), - &bwval))) - all_paths_lb = false; - else - cum_bw += bwval; - bgp_path_info_mpath_lb_update(new_best, true, - all_paths_lb, cum_bw); - + if (mpath_count > 1 || new_best->mpath) { + bgp_path_info_mpath_count_set(new_best, mpath_count); + bgp_path_info_mpath_lb_update(new_best, true, all_paths_lb, cum_bw); + } if (debug) zlog_debug("%pBD(%s): New mpath count (incl newbest) %d mpath-change %s all_paths_lb %d cum_bw %" PRIu64, dest, bgp->name_pretty, mpath_count, mpath_changed ? "YES" : "NO", all_paths_lb, cum_bw); + if (mpath_count == 1) + UNSET_FLAG(new_best->flags, BGP_PATH_MULTIPATH); if (mpath_changed || (bgp_path_info_mpath_count(new_best) != old_mpath_count)) SET_FLAG(new_best->flags, BGP_PATH_MULTIPATH_CHG); - if ((mpath_count - 1) != old_mpath_count || - old_cum_bw != cum_bw) + if ((mpath_count) != old_mpath_count || old_cum_bw != cum_bw) SET_FLAG(new_best->flags, BGP_PATH_LINK_BW_CHG); } } @@ -796,20 +590,13 @@ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, */ void bgp_mp_dmed_deselect(struct bgp_path_info *dmed_best) { - struct bgp_path_info *mpinfo, *mpnext; - if (!dmed_best) return; - for (mpinfo = bgp_path_info_mpath_first(dmed_best); mpinfo; - mpinfo = mpnext) { - mpnext = bgp_path_info_mpath_next(mpinfo); - bgp_path_info_mpath_dequeue(mpinfo); - } - bgp_path_info_mpath_count_set(dmed_best, 0); UNSET_FLAG(dmed_best->flags, BGP_PATH_MULTIPATH_CHG); UNSET_FLAG(dmed_best->flags, BGP_PATH_LINK_BW_CHG); + assert(bgp_path_info_mpath_first(dmed_best) == NULL); } @@ -847,7 +634,7 @@ void bgp_path_info_mpath_aggregate_update(struct bgp_path_info *new_best, if (!new_best) return; - if (!bgp_path_info_mpath_count(new_best)) { + if (bgp_path_info_mpath_count(new_best) == 1) { if ((new_attr = bgp_path_info_mpath_attr(new_best))) { bgp_attr_unintern(&new_attr); bgp_path_info_mpath_attr_set(new_best, NULL); diff --git a/bgpd/bgp_mpath.h b/bgpd/bgp_mpath.h index 8034e25e3f7c..330a3f893971 100644 --- a/bgpd/bgp_mpath.h +++ b/bgpd/bgp_mpath.h @@ -2,8 +2,9 @@ /* * BGP Multipath * Copyright (C) 2010 Google Inc. + * 2024 Nvidia Corporation * - * This file is part of Quagga + * This file is part of FRR */ #ifndef _FRR_BGP_MPATH_H @@ -13,12 +14,6 @@ * multipath selections, lazily allocated to save memory */ struct bgp_path_info_mpath { - /* Points to the first multipath (on bestpath) or the next multipath */ - struct bgp_path_info_mpath *mp_nexta; - - /* Points to the previous multipath or NULL on bestpath */ - struct bgp_path_info_mpath *mp_prev; - /* Points to bgp_path_info associated with this multipath info */ struct bgp_path_info *mp_info; @@ -51,14 +46,10 @@ extern int bgp_maximum_paths_unset(struct bgp *bgp, afi_t afi, safi_t safi, */ extern int bgp_path_info_nexthop_cmp(struct bgp_path_info *bpi1, struct bgp_path_info *bpi2); -extern void bgp_mp_list_init(struct list *mp_list); -extern void bgp_mp_list_clear(struct list *mp_list); -extern void bgp_mp_list_add(struct list *mp_list, struct bgp_path_info *mpinfo); extern void bgp_mp_dmed_deselect(struct bgp_path_info *dmed_best); extern void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, struct bgp_path_info *new_best, - struct bgp_path_info *old_best, - struct list *mp_list, + struct bgp_path_info *old_best, uint32_t num_candidates, struct bgp_maxpaths_cfg *mpath_cfg); extern void bgp_path_info_mpath_aggregate_update(struct bgp_path_info *new_best, diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index f28c9adda218..00d128557009 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -2173,8 +2173,7 @@ bool subgroup_announce_check(struct bgp_dest *dest, struct bgp_path_info *pi, from = pi->peer; filter = &peer->filter[afi][safi]; bgp = SUBGRP_INST(subgrp); - piattr = bgp_path_info_mpath_count(pi) ? bgp_path_info_mpath_attr(pi) - : pi->attr; + piattr = bgp_path_info_mpath_count(pi) > 1 ? bgp_path_info_mpath_attr(pi) : pi->attr; if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX_OUT) && peer->pmax_out[afi][safi] != 0 && @@ -2854,13 +2853,12 @@ void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest, struct bgp_path_info *pi2; int paths_eq, do_mpath; bool debug, any_comparisons; - struct list mp_list; char pfx_buf[PREFIX2STR_BUFFER] = {}; char path_buf[PATH_ADDPATH_STR_BUFFER]; enum bgp_path_selection_reason reason = bgp_path_selection_none; bool unsorted_items = true; + uint32_t num_candidates = 0; - bgp_mp_list_init(&mp_list); do_mpath = (mpath_cfg->maxpaths_ebgp > 1 || mpath_cfg->maxpaths_ibgp > 1); @@ -3235,7 +3233,8 @@ void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest, "%pBD(%s): %s is the bestpath, add to the multipath list", dest, bgp->name_pretty, path_buf); - bgp_mp_list_add(&mp_list, pi); + SET_FLAG(pi->flags, BGP_PATH_MULTIPATH_NEW); + num_candidates++; continue; } @@ -3258,15 +3257,14 @@ void bgp_best_selection(struct bgp *bgp, struct bgp_dest *dest, "%pBD(%s): %s is equivalent to the bestpath, add to the multipath list", dest, bgp->name_pretty, path_buf); - bgp_mp_list_add(&mp_list, pi); + SET_FLAG(pi->flags, BGP_PATH_MULTIPATH_NEW); + num_candidates++; } } } - bgp_path_info_mpath_update(bgp, dest, new_select, old_select, &mp_list, - mpath_cfg); + bgp_path_info_mpath_update(bgp, dest, new_select, old_select, num_candidates, mpath_cfg); bgp_path_info_mpath_aggregate_update(new_select, old_select); - bgp_mp_list_clear(&mp_list); bgp_addpath_update_ids(bgp, dest, afi, safi); @@ -11189,9 +11187,8 @@ void route_vty_out_detail(struct vty *vty, struct bgp *bgp, struct bgp_dest *bn, vty_out(vty, ", otc %u", attr->otc); } - if (CHECK_FLAG(path->flags, BGP_PATH_MULTIPATH) - || (CHECK_FLAG(path->flags, BGP_PATH_SELECTED) - && bgp_path_info_mpath_count(path))) { + if (CHECK_FLAG(path->flags, BGP_PATH_MULTIPATH) || + (CHECK_FLAG(path->flags, BGP_PATH_SELECTED) && bgp_path_info_mpath_count(path) > 1)) { if (json_paths) json_object_boolean_true_add(json_path, "multipath"); else diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index b6df2411812b..d71bfd3ebc62 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -313,6 +313,11 @@ struct bgp_path_info { #define BGP_PATH_STALE (1 << 8) #define BGP_PATH_REMOVED (1 << 9) #define BGP_PATH_COUNTED (1 << 10) +/* + * A BGP_PATH_MULTIPATH flag is not set on the best path + * it is set on every other node that is part of ECMP + * for that particular dest + */ #define BGP_PATH_MULTIPATH (1 << 11) #define BGP_PATH_MULTIPATH_CHG (1 << 12) #define BGP_PATH_RIB_ATTR_CHG (1 << 13) @@ -322,6 +327,15 @@ struct bgp_path_info { #define BGP_PATH_MPLSVPN_LABEL_NH (1 << 17) #define BGP_PATH_MPLSVPN_NH_LABEL_BIND (1 << 18) #define BGP_PATH_UNSORTED (1 << 19) +/* + * BGP_PATH_MULTIPATH_NEW is set on those bgp_path_info + * nodes that we have decided should possibly be in the + * ecmp path for a particular dest. This flag is + * removed when the bgp_path_info's are looked at to + * decide on whether or not a bgp_path_info is on + * the actual ecmp path. + */ +#define BGP_PATH_MULTIPATH_NEW (1 << 20) /* BGP route type. This can be static, RIP, OSPF, BGP etc. */ uint8_t type; diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index ec60e5db86a3..583b9e7980f5 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -3220,7 +3220,7 @@ route_set_ecommunity_lb(void *rule, const struct prefix *prefix, void *object) return RMAP_OKAY; bw_bytes = (peer->bgp->lb_ref_bw * 1000 * 1000) / 8; - mpath_count = bgp_path_info_mpath_count(path) + 1; + mpath_count = bgp_path_info_mpath_count(path); bw_bytes *= mpath_count; } diff --git a/tests/bgpd/subdir.am b/tests/bgpd/subdir.am index 5148e7e4408a..97845ec1aa8b 100644 --- a/tests/bgpd/subdir.am +++ b/tests/bgpd/subdir.am @@ -52,17 +52,6 @@ tests_bgpd_test_mp_attr_LDADD = $(BGP_TEST_LDADD) tests_bgpd_test_mp_attr_SOURCES = tests/bgpd/test_mp_attr.c EXTRA_DIST += tests/bgpd/test_mp_attr.py - -if BGPD -check_PROGRAMS += tests/bgpd/test_mpath -endif -tests_bgpd_test_mpath_CFLAGS = $(TESTS_CFLAGS) -tests_bgpd_test_mpath_CPPFLAGS = $(TESTS_CPPFLAGS) -tests_bgpd_test_mpath_LDADD = $(BGP_TEST_LDADD) -tests_bgpd_test_mpath_SOURCES = tests/bgpd/test_mpath.c -EXTRA_DIST += tests/bgpd/test_mpath.py - - if BGPD check_PROGRAMS += tests/bgpd/test_packet endif diff --git a/tests/bgpd/test_mpath.c b/tests/bgpd/test_mpath.c deleted file mode 100644 index ebbe3ac3e28f..000000000000 --- a/tests/bgpd/test_mpath.c +++ /dev/null @@ -1,482 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * BGP Multipath Unit Test - * Copyright (C) 2010 Google Inc. - * - * This file is part of Quagga - */ - -#include - -#include "qobj.h" -#include "vty.h" -#include "stream.h" -#include "privs.h" -#include "linklist.h" -#include "memory.h" -#include "zclient.h" -#include "queue.h" -#include "filter.h" - -#include "bgpd/bgpd.h" -#include "bgpd/bgp_table.h" -#include "bgpd/bgp_route.h" -#include "bgpd/bgp_attr.h" -#include "bgpd/bgp_nexthop.h" -#include "bgpd/bgp_mpath.h" -#include "bgpd/bgp_evpn.h" -#include "bgpd/bgp_network.h" - -#define VT100_RESET "\x1b[0m" -#define VT100_RED "\x1b[31m" -#define VT100_GREEN "\x1b[32m" -#define VT100_YELLOW "\x1b[33m" -#define OK VT100_GREEN "OK" VT100_RESET -#define FAILED VT100_RED "failed" VT100_RESET - -#define TEST_PASSED 0 -#define TEST_FAILED -1 - -#define EXPECT_TRUE(expr, res) \ - if (!(expr)) { \ - printf("Test failure in %s line %u: %s\n", __func__, __LINE__, \ - #expr); \ - (res) = TEST_FAILED; \ - } - -typedef struct testcase_t__ testcase_t; - -typedef int (*test_setup_func)(testcase_t *); -typedef int (*test_run_func)(testcase_t *); -typedef int (*test_cleanup_func)(testcase_t *); - -struct testcase_t__ { - const char *desc; - void *test_data; - void *verify_data; - void *tmp_data; - test_setup_func setup; - test_run_func run; - test_cleanup_func cleanup; -}; - -/* need these to link in libbgp */ -struct event_loop *master = NULL; -extern struct zclient *zclient; -struct zebra_privs_t bgpd_privs = { - .user = NULL, - .group = NULL, - .vty_group = NULL, -}; - -static int tty = 0; - -/* Create fake bgp instance */ -static struct bgp *bgp_create_fake(as_t *as, const char *name) -{ - struct bgp *bgp; - afi_t afi; - safi_t safi; - - if ((bgp = XCALLOC(MTYPE_BGP, sizeof(struct bgp))) == NULL) - return NULL; - - bgp_lock(bgp); - // bgp->peer_self = peer_new (bgp); - // bgp->peer_self->host = XSTRDUP (MTYPE_BGP_PEER_HOST, "Static - // announcement"); - - bgp->peer = list_new(); - // bgp->peer->cmp = (int (*)(void *, void *)) peer_cmp; - - bgp->group = list_new(); - // bgp->group->cmp = (int (*)(void *, void *)) peer_group_cmp; - - bgp_evpn_init(bgp); - FOREACH_AFI_SAFI (afi, safi) { - bgp->route[afi][safi] = bgp_table_init(bgp, afi, safi); - bgp->aggregate[afi][safi] = bgp_table_init(bgp, afi, safi); - bgp->rib[afi][safi] = bgp_table_init(bgp, afi, safi); - bgp->maxpaths[afi][safi].maxpaths_ebgp = MULTIPATH_NUM; - bgp->maxpaths[afi][safi].maxpaths_ibgp = MULTIPATH_NUM; - } - - bgp_scan_init(bgp); - bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; - bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; - bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; - bgp->restart_time = BGP_DEFAULT_RESTART_TIME; - bgp->stalepath_time = BGP_DEFAULT_STALEPATH_TIME; - - bgp->as = *as; - - if (name) - bgp->name = strdup(name); - - return bgp; -} - -/*========================================================= - * Testcase for maximum-paths configuration - */ -static int setup_bgp_cfg_maximum_paths(testcase_t *t) -{ - as_t asn = 1; - t->tmp_data = bgp_create_fake(&asn, NULL); - if (!t->tmp_data) - return -1; - return 0; -} - -static int run_bgp_cfg_maximum_paths(testcase_t *t) -{ - afi_t afi; - safi_t safi; - struct bgp *bgp; - int api_result; - int test_result = TEST_PASSED; - - bgp = t->tmp_data; - FOREACH_AFI_SAFI (afi, safi) { - /* test bgp_maximum_paths_set */ - api_result = bgp_maximum_paths_set(bgp, afi, safi, - BGP_PEER_EBGP, 10, 0); - EXPECT_TRUE(api_result == 0, test_result); - api_result = bgp_maximum_paths_set(bgp, afi, safi, - BGP_PEER_IBGP, 10, 0); - EXPECT_TRUE(api_result == 0, test_result); - EXPECT_TRUE(bgp->maxpaths[afi][safi].maxpaths_ebgp == 10, - test_result); - EXPECT_TRUE(bgp->maxpaths[afi][safi].maxpaths_ibgp == 10, - test_result); - - /* test bgp_maximum_paths_unset */ - api_result = - bgp_maximum_paths_unset(bgp, afi, safi, BGP_PEER_EBGP); - EXPECT_TRUE(api_result == 0, test_result); - api_result = - bgp_maximum_paths_unset(bgp, afi, safi, BGP_PEER_IBGP); - EXPECT_TRUE(api_result == 0, test_result); - EXPECT_TRUE((bgp->maxpaths[afi][safi].maxpaths_ebgp - == MULTIPATH_NUM), - test_result); - EXPECT_TRUE((bgp->maxpaths[afi][safi].maxpaths_ibgp - == MULTIPATH_NUM), - test_result); - } - - return test_result; -} - -static int cleanup_bgp_cfg_maximum_paths(testcase_t *t) -{ - return bgp_delete((struct bgp *)t->tmp_data); -} - -testcase_t test_bgp_cfg_maximum_paths = { - .desc = "Test bgp maximum-paths config", - .setup = setup_bgp_cfg_maximum_paths, - .run = run_bgp_cfg_maximum_paths, - .cleanup = cleanup_bgp_cfg_maximum_paths, -}; - -/*========================================================= - * Testcase for bgp_mp_list - */ -struct peer test_mp_list_peer[] = { - {.local_as = 1, .as = 2}, {.local_as = 1, .as = 2}, - {.local_as = 1, .as = 2}, {.local_as = 1, .as = 2}, - {.local_as = 1, .as = 2}, -}; -int test_mp_list_peer_count = array_size(test_mp_list_peer); -struct attr test_mp_list_attr[4]; -struct bgp_path_info test_mp_list_info[] = { - {.peer = &test_mp_list_peer[0], .attr = &test_mp_list_attr[0]}, - {.peer = &test_mp_list_peer[1], .attr = &test_mp_list_attr[1]}, - {.peer = &test_mp_list_peer[2], .attr = &test_mp_list_attr[1]}, - {.peer = &test_mp_list_peer[3], .attr = &test_mp_list_attr[2]}, - {.peer = &test_mp_list_peer[4], .attr = &test_mp_list_attr[3]}, -}; -int test_mp_list_info_count = array_size(test_mp_list_info); - -static int setup_bgp_mp_list(testcase_t *t) -{ - test_mp_list_attr[0].nexthop.s_addr = 0x01010101; - test_mp_list_attr[1].nexthop.s_addr = 0x02020202; - test_mp_list_attr[2].nexthop.s_addr = 0x03030303; - test_mp_list_attr[3].nexthop.s_addr = 0x04040404; - - if ((test_mp_list_peer[0].su_remote = sockunion_str2su("1.1.1.1")) - == NULL) - return -1; - if ((test_mp_list_peer[1].su_remote = sockunion_str2su("2.2.2.2")) - == NULL) - return -1; - if ((test_mp_list_peer[2].su_remote = sockunion_str2su("3.3.3.3")) - == NULL) - return -1; - if ((test_mp_list_peer[3].su_remote = sockunion_str2su("4.4.4.4")) - == NULL) - return -1; - if ((test_mp_list_peer[4].su_remote = sockunion_str2su("5.5.5.5")) - == NULL) - return -1; - - return 0; -} - -static int run_bgp_mp_list(testcase_t *t) -{ - struct list mp_list; - struct listnode *mp_node; - struct bgp_path_info *info; - int i; - int test_result = TEST_PASSED; - bgp_mp_list_init(&mp_list); - EXPECT_TRUE(listcount(&mp_list) == 0, test_result); - - bgp_mp_list_add(&mp_list, &test_mp_list_info[1]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[4]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[2]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[3]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[0]); - - for (i = 0, mp_node = mp_list.head; i < test_mp_list_info_count; - i++, mp_node = listnextnode(mp_node)) { - info = listgetdata(mp_node); - info->lock++; - EXPECT_TRUE(info == &test_mp_list_info[i], test_result); - } - - bgp_mp_list_clear(&mp_list); - EXPECT_TRUE(listcount(&mp_list) == 0, test_result); - - return test_result; -} - -static int cleanup_bgp_mp_list(testcase_t *t) -{ - int i; - - for (i = 0; i < test_mp_list_peer_count; i++) - sockunion_free(test_mp_list_peer[i].su_remote); - - return 0; -} - -testcase_t test_bgp_mp_list = { - .desc = "Test bgp_mp_list", - .setup = setup_bgp_mp_list, - .run = run_bgp_mp_list, - .cleanup = cleanup_bgp_mp_list, -}; - -/*========================================================= - * Testcase for bgp_path_info_mpath_update - */ - -static struct bgp_dest *dest; - -static int setup_bgp_path_info_mpath_update(testcase_t *t) -{ - int i; - struct bgp *bgp; - struct bgp_table *rt; - struct prefix p; - as_t asn = 1; - - t->tmp_data = bgp_create_fake(&asn, NULL); - if (!t->tmp_data) - return -1; - - bgp = t->tmp_data; - rt = bgp->rib[AFI_IP][SAFI_UNICAST]; - - if (!rt) - return -1; - - str2prefix("42.1.1.0/24", &p); - dest = bgp_node_get(rt, &p); - - setup_bgp_mp_list(t); - for (i = 0; i < test_mp_list_info_count; i++) - bgp_path_info_add(dest, &test_mp_list_info[i]); - return 0; -} - -static int run_bgp_path_info_mpath_update(testcase_t *t) -{ - struct bgp_path_info *new_best, *old_best, *mpath; - struct list mp_list; - struct bgp_maxpaths_cfg mp_cfg = {3, 3}; - - int test_result = TEST_PASSED; - bgp_mp_list_init(&mp_list); - bgp_mp_list_add(&mp_list, &test_mp_list_info[4]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[3]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[0]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[1]); - new_best = &test_mp_list_info[3]; - old_best = NULL; - bgp_path_info_mpath_update(NULL, dest, new_best, old_best, &mp_list, - &mp_cfg); - bgp_mp_list_clear(&mp_list); - EXPECT_TRUE(bgp_path_info_mpath_count(new_best) == 2, test_result); - mpath = bgp_path_info_mpath_first(new_best); - EXPECT_TRUE(mpath == &test_mp_list_info[0], test_result); - EXPECT_TRUE(CHECK_FLAG(mpath->flags, BGP_PATH_MULTIPATH), test_result); - mpath = bgp_path_info_mpath_next(mpath); - EXPECT_TRUE(mpath == &test_mp_list_info[1], test_result); - EXPECT_TRUE(CHECK_FLAG(mpath->flags, BGP_PATH_MULTIPATH), test_result); - - bgp_mp_list_add(&mp_list, &test_mp_list_info[0]); - bgp_mp_list_add(&mp_list, &test_mp_list_info[1]); - new_best = &test_mp_list_info[0]; - old_best = &test_mp_list_info[3]; - bgp_path_info_mpath_update(NULL, dest, new_best, old_best, &mp_list, - &mp_cfg); - bgp_mp_list_clear(&mp_list); - EXPECT_TRUE(bgp_path_info_mpath_count(new_best) == 1, test_result); - mpath = bgp_path_info_mpath_first(new_best); - EXPECT_TRUE(mpath == &test_mp_list_info[1], test_result); - EXPECT_TRUE(CHECK_FLAG(mpath->flags, BGP_PATH_MULTIPATH), test_result); - EXPECT_TRUE(!CHECK_FLAG(test_mp_list_info[0].flags, BGP_PATH_MULTIPATH), - test_result); - - return test_result; -} - -static int cleanup_bgp_path_info_mpath_update(testcase_t *t) -{ - int i; - - for (i = 0; i < test_mp_list_peer_count; i++) - sockunion_free(test_mp_list_peer[i].su_remote); - - return bgp_delete((struct bgp *)t->tmp_data); -} - -testcase_t test_bgp_path_info_mpath_update = { - .desc = "Test bgp_path_info_mpath_update", - .setup = setup_bgp_path_info_mpath_update, - .run = run_bgp_path_info_mpath_update, - .cleanup = cleanup_bgp_path_info_mpath_update, -}; - -/*========================================================= - * Set up testcase vector - */ -testcase_t *all_tests[] = { - &test_bgp_cfg_maximum_paths, &test_bgp_mp_list, - &test_bgp_path_info_mpath_update, -}; - -int all_tests_count = array_size(all_tests); - -/*========================================================= - * Test Driver Functions - */ -static int global_test_init(void) -{ - qobj_init(); - master = event_master_create(NULL); - zclient = zclient_new(master, &zclient_options_default, NULL, 0); - bgp_master_init(master, BGP_SOCKET_SNDBUF_SIZE, list_new()); - vrf_init(NULL, NULL, NULL, NULL); - bgp_option_set(BGP_OPT_NO_LISTEN); - - if (fileno(stdout) >= 0) - tty = isatty(fileno(stdout)); - return 0; -} - -static int global_test_cleanup(void) -{ - if (zclient != NULL) - zclient_free(zclient); - event_master_free(master); - return 0; -} - -static void display_result(testcase_t *test, int result) -{ - if (tty) - printf("%s: %s\n", test->desc, - result == TEST_PASSED ? OK : FAILED); - else - printf("%s: %s\n", test->desc, - result == TEST_PASSED ? "OK" : "FAILED"); -} - -static int setup_test(testcase_t *t) -{ - int res = 0; - if (t->setup) - res = t->setup(t); - return res; -} - -static int cleanup_test(testcase_t *t) -{ - int res = 0; - if (t->cleanup) - res = t->cleanup(t); - return res; -} - -static void run_tests(testcase_t *tests[], int num_tests, int *pass_count, - int *fail_count) -{ - int test_index, result; - testcase_t *cur_test; - - *pass_count = *fail_count = 0; - - for (test_index = 0; test_index < num_tests; test_index++) { - cur_test = tests[test_index]; - if (!cur_test->desc) { - printf("error: test %d has no description!\n", - test_index); - continue; - } - if (!cur_test->run) { - printf("error: test %s has no run function!\n", - cur_test->desc); - continue; - } - if (setup_test(cur_test) != 0) { - printf("error: setup failed for test %s\n", - cur_test->desc); - continue; - } - result = cur_test->run(cur_test); - if (result == TEST_PASSED) - *pass_count += 1; - else - *fail_count += 1; - display_result(cur_test, result); - if (cleanup_test(cur_test) != 0) { - printf("error: cleanup failed for test %s\n", - cur_test->desc); - continue; - } - } -} - -int main(void) -{ - int pass_count, fail_count; - time_t cur_time; - char buf[32]; - - time(&cur_time); - printf("BGP Multipath Tests Run at %s", ctime_r(&cur_time, buf)); - if (global_test_init() != 0) { - printf("Global init failed. Terminating.\n"); - exit(1); - } - run_tests(all_tests, all_tests_count, &pass_count, &fail_count); - global_test_cleanup(); - printf("Total pass/fail: %d/%d\n", pass_count, fail_count); - return fail_count; -} diff --git a/tests/bgpd/test_mpath.py b/tests/bgpd/test_mpath.py deleted file mode 100644 index 582fd25c2024..000000000000 --- a/tests/bgpd/test_mpath.py +++ /dev/null @@ -1,10 +0,0 @@ -import frrtest - - -class TestMpath(frrtest.TestMultiOut): - program = "./test_mpath" - - -TestMpath.okfail("bgp maximum-paths config") -TestMpath.okfail("bgp_mp_list") -TestMpath.okfail("bgp_path_info_mpath_update")