Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizations and problem fixing for large scale ecmp from bgp #17229

Merged
merged 5 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions bgpd/bgp_aspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ static struct aspath *aspath_new(enum asnotation_mode asnotation)

as = XCALLOC(MTYPE_AS_PATH, sizeof(struct aspath));
as->asnotation = asnotation;
as->count = 0;

return as;
}

Expand Down Expand Up @@ -399,6 +401,11 @@ unsigned int aspath_count_confeds(struct aspath *aspath)
}

unsigned int aspath_count_hops(const struct aspath *aspath)
{
return aspath->count;
}

static unsigned int aspath_count_hops_internal(const struct aspath *aspath)
{
int count = 0;
struct assegment *seg = aspath->segments;
Expand Down Expand Up @@ -708,6 +715,7 @@ struct aspath *aspath_dup(struct aspath *aspath)
else
new->str[0] = '\0';

new->count = aspath->count;
return new;
}

Expand All @@ -729,6 +737,7 @@ static void *aspath_hash_alloc(void *arg)
new->str_len = aspath->str_len;
new->json = aspath->json;
new->asnotation = aspath->asnotation;
new->count = aspath->count;

return new;
}
Expand Down Expand Up @@ -856,6 +865,8 @@ struct aspath *aspath_parse(struct stream *s, size_t length, int use32bit,
if (assegments_parse(s, length, &as.segments, use32bit) < 0)
return NULL;

as.count = aspath_count_hops_internal(&as);

/* If already same aspath exist then return it. */
find = hash_get(ashash, &as, aspath_hash_alloc);

Expand Down Expand Up @@ -1032,7 +1043,7 @@ static struct assegment *aspath_aggregate_as_set_add(struct aspath *aspath,
asset->as[asset->length - 1] = as;
}


aspath->count = aspath_count_hops_internal(aspath);
return asset;
}

Expand Down Expand Up @@ -1113,6 +1124,8 @@ struct aspath *aspath_aggregate(struct aspath *as1, struct aspath *as2)

assegment_normalise(aspath->segments);
aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);

return aspath;
}

Expand Down Expand Up @@ -1268,6 +1281,7 @@ struct aspath *aspath_replace_regex_asn(struct aspath *aspath,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);
return new;
}

Expand All @@ -1293,6 +1307,8 @@ struct aspath *aspath_replace_specific_asn(struct aspath *aspath,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand All @@ -1315,6 +1331,8 @@ struct aspath *aspath_replace_all_asn(struct aspath *aspath, as_t our_asn)
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand All @@ -1341,6 +1359,8 @@ struct aspath *aspath_replace_private_asns(struct aspath *aspath, as_t asn,
}

aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);

return new;
}

Expand Down Expand Up @@ -1413,6 +1433,7 @@ struct aspath *aspath_remove_private_asns(struct aspath *aspath, as_t peer_asn)
if (!aspath->refcnt)
aspath_free(aspath);
aspath_str_update(new, false);
new->count = aspath_count_hops_internal(new);
return new;
}

Expand Down Expand Up @@ -1469,6 +1490,7 @@ static struct aspath *aspath_merge(struct aspath *as1, struct aspath *as2)
last->next = as2->segments;
as2->segments = new;
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand All @@ -1486,6 +1508,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
if (as2->segments == NULL) {
as2->segments = assegment_dup_all(as1->segments);
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand All @@ -1506,6 +1529,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
if (!as2->segments) {
as2->segments = assegment_dup_all(as1->segments);
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
}

Expand Down Expand Up @@ -1551,6 +1575,7 @@ struct aspath *aspath_prepend(struct aspath *as1, struct aspath *as2)
* the inbetween AS_SEQUENCE of seg2 in the process
*/
aspath_str_update(as2, false);
as2->count = aspath_count_hops_internal(as2);
return as2;
} else {
/* AS_SET merge code is needed at here. */
Expand Down Expand Up @@ -1662,6 +1687,7 @@ struct aspath *aspath_filter_exclude(struct aspath *source,
lastseg = newseg;
}
aspath_str_update(newpath, false);
newpath->count = aspath_count_hops_internal(newpath);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand All @@ -1680,6 +1706,7 @@ struct aspath *aspath_filter_exclude_all(struct aspath *source)
newpath = aspath_new(source->asnotation);

aspath_str_update(newpath, false);
newpath->count = aspath_count_hops_internal(newpath);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand Down Expand Up @@ -1767,6 +1794,7 @@ struct aspath *aspath_filter_exclude_acl(struct aspath *source,


aspath_str_update(source, false);
source->count = aspath_count_hops_internal(source);
/* We are happy returning even an empty AS_PATH, because the
* administrator
* might expect this very behaviour. There's a mean to avoid this, if
Expand Down Expand Up @@ -1805,6 +1833,7 @@ static struct aspath *aspath_add_asns(struct aspath *aspath, as_t asno,
}

aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);
return aspath;
}

Expand Down Expand Up @@ -1896,6 +1925,7 @@ struct aspath *aspath_reconcile_as4(struct aspath *aspath,
if (!hops) {
newpath = aspath_dup(as4path);
aspath_str_update(newpath, false);
/* dup sets the count properly */
return newpath;
}

Expand Down Expand Up @@ -1957,6 +1987,7 @@ struct aspath *aspath_reconcile_as4(struct aspath *aspath,
aspath_free(newpath);
mergedpath->segments = assegment_normalise(mergedpath->segments);
aspath_str_update(mergedpath, false);
mergedpath->count = aspath_count_hops_internal(mergedpath);

if (BGP_DEBUG(as4, AS4))
zlog_debug("[AS4] result of synthesizing is %s",
Expand Down Expand Up @@ -2027,8 +2058,10 @@ struct aspath *aspath_delete_confed_seq(struct aspath *aspath)
seg = next;
}

if (removed_confed_segment)
if (removed_confed_segment) {
aspath_str_update(aspath, false);
aspath->count = aspath_count_hops_internal(aspath);
}

return aspath;
}
Expand Down
1 change: 1 addition & 0 deletions bgpd/bgp_aspath.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct aspath {
and AS path regular expression match. */
char *str;
unsigned short str_len;
uint32_t count;

/* AS notation used by string expression of AS path */
enum asnotation_mode asnotation;
Expand Down
10 changes: 10 additions & 0 deletions bgpd/bgp_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
EVENT_OFF(going_away->t_delayopen);
EVENT_OFF(going_away->t_connect_check_r);
EVENT_OFF(going_away->t_connect_check_w);
EVENT_OFF(going_away->t_stop_with_notify);
EVENT_OFF(keeper->t_routeadv);
EVENT_OFF(keeper->t_connect);
EVENT_OFF(keeper->t_delayopen);
Expand Down Expand Up @@ -1475,6 +1476,8 @@ enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection)
EVENT_OFF(connection->t_connect_check_r);
EVENT_OFF(connection->t_connect_check_w);

EVENT_OFF(connection->t_stop_with_notify);

/* Stop all timers. */
EVENT_OFF(connection->t_start);
EVENT_OFF(connection->t_connect);
Expand Down Expand Up @@ -3032,3 +3035,10 @@ void bgp_peer_gr_flags_update(struct peer *peer)
}
}
}

void bgp_event_stop_with_notify(struct event *event)
{
struct peer_connection *connection = EVENT_ARG(event);

bgp_stop_with_notify(connection, BGP_NOTIFY_SEND_HOLD_ERR, 0);
}
1 change: 1 addition & 0 deletions bgpd/bgp_fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ enum bgp_fsm_state_progress {
extern void bgp_fsm_nht_update(struct peer_connection *connection,
struct peer *peer, bool has_valid_nexthops);
extern void bgp_event(struct event *event);
extern void bgp_event_stop_with_notify(struct event *event);
extern int bgp_event_update(struct peer_connection *connection,
enum bgp_fsm_events event);
extern enum bgp_fsm_state_progress bgp_stop(struct peer_connection *connection);
Expand Down
60 changes: 28 additions & 32 deletions bgpd/bgp_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,42 +122,38 @@ static void bgp_packet_add(struct peer_connection *connection,
peer->last_sendq_ok = monotime(NULL);

stream_fifo_push(connection->obuf, s);
}

delta = monotime(NULL) - peer->last_sendq_ok;
delta = monotime(NULL) - peer->last_sendq_ok;

if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
holdtime = atomic_load_explicit(&peer->holdtime,
memory_order_relaxed);
else
holdtime = peer->bgp->default_holdtime;
if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
holdtime = atomic_load_explicit(&peer->holdtime, memory_order_relaxed);
else
holdtime = peer->bgp->default_holdtime;

sendholdtime = holdtime * 2;
sendholdtime = holdtime * 2;

/* Note that when we're here, we're adding some packet to the
* OutQ. That includes keepalives when there is nothing to
* do, so there's a guarantee we pass by here once in a while.
*
* That implies there is no need to go set up another separate
* timer that ticks down SendHoldTime, as we'll be here sooner
* or later anyway and will see the checks below failing.
*/
if (!holdtime) {
/* no holdtime, do nothing. */
} else if (delta > sendholdtime) {
flog_err(
EC_BGP_SENDQ_STUCK_PROPER,
"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
peer, sendholdtime);
bgp_stop_with_notify(connection,
BGP_NOTIFY_SEND_HOLD_ERR, 0);
} else if (delta > (intmax_t)holdtime &&
monotime(NULL) - peer->last_sendq_warn > 5) {
flog_warn(
EC_BGP_SENDQ_STUCK_WARN,
"%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
peer, holdtime);
peer->last_sendq_warn = monotime(NULL);
}
/* Note that when we're here, we're adding some packet to the
* OutQ. That includes keepalives when there is nothing to
* do, so there's a guarantee we pass by here once in a while.
*
* That implies there is no need to go set up another separate
* timer that ticks down SendHoldTime, as we'll be here sooner
* or later anyway and will see the checks below failing.
*/
if (!holdtime) {
/* no holdtime, do nothing. */
} else if (delta > sendholdtime) {
flog_err(EC_BGP_SENDQ_STUCK_PROPER,
"%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
peer, sendholdtime);
event_add_event(bm->master, bgp_event_stop_with_notify, connection, 0,
&connection->t_stop_with_notify);
} else if (delta > (intmax_t)holdtime && monotime(NULL) - peer->last_sendq_warn > 5) {
flog_warn(EC_BGP_SENDQ_STUCK_WARN,
"%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
peer, holdtime);
peer->last_sendq_warn = monotime(NULL);
}
}

Expand Down
22 changes: 13 additions & 9 deletions bgpd/bgp_route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1133,9 +1133,9 @@ int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
/* 4. AS path length check. */
if (!CHECK_FLAG(bgp->flags, BGP_FLAG_ASPATH_IGNORE)) {
int exist_hops = aspath_count_hops(existattr->aspath);
int exist_confeds = aspath_count_confeds(existattr->aspath);

if (CHECK_FLAG(bgp->flags, BGP_FLAG_ASPATH_CONFED)) {
int exist_confeds = aspath_count_confeds(existattr->aspath);
int aspath_hops;

aspath_hops = aspath_count_hops(newattr->aspath);
Expand Down Expand Up @@ -4676,10 +4676,12 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
* will not be interned. In which case, it is ok to update the
* attr->evpn_overlay, so that, this can be stored in adj_in.
*/
if ((afi == AFI_L2VPN) && evpn)
bgp_attr_set_evpn_overlay(attr, evpn);
else
evpn_overlay_free(evpn);
if (evpn) {
if (afi == AFI_L2VPN)
bgp_attr_set_evpn_overlay(attr, evpn);
else
evpn_overlay_free(evpn);
}
bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels);
}

Expand Down Expand Up @@ -4855,10 +4857,12 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
* attr->evpn_overlay with evpn directly. Instead memcpy
* evpn to new_atr.evpn_overlay before it is interned.
*/
if (soft_reconfig && (afi == AFI_L2VPN) && evpn)
bgp_attr_set_evpn_overlay(&new_attr, evpn);
else
evpn_overlay_free(evpn);
if (soft_reconfig && evpn) {
if (afi == AFI_L2VPN)
bgp_attr_set_evpn_overlay(&new_attr, evpn);
else
evpn_overlay_free(evpn);
}

/* Apply incoming route-map.
* NB: new_attr may now contain newly allocated values from route-map
Expand Down
2 changes: 2 additions & 0 deletions bgpd/bgpd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,8 @@ struct peer_connection {
struct event *t_process_packet;
struct event *t_process_packet_error;

struct event *t_stop_with_notify;

union sockunion su;
#define BGP_CONNECTION_SU_UNSPEC(connection) \
(connection->su.sa.sa_family == AF_UNSPEC)
Expand Down
2 changes: 1 addition & 1 deletion zebra/kernel_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
} while (status == -1 && errno == EINTR);

if (status == -1) {
if (errno == EWOULDBLOCK || errno == EAGAIN)
if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EMSGSIZE)
return 0;
flog_err(EC_ZEBRA_RECVMSG_OVERRUN, "%s recvmsg overrun: %s",
nl->name, safe_strerror(errno));
Expand Down
Loading