Skip to content

Commit 6a75d33

Browse files
Soumya Roysoumyar-roy
Soumya Roy
authored andcommitted
zebra: Bring up 514 BGP neighbor sessions
Issue: When 514 inerfaces/neighbors are configured, it creates socket error, "Cannot allocate memory", when back to back V6 RA messages are tried to be sent over the socket. This prevents interface, to know its peer's link local address. Socket error comes when 1) try to join ICMPv6 all router multicast group, back to back for all interfaces 2)send back to back RA for all interfaces Fix: 1)For ICMPv6 join case, we check if the interface has already joined all router group, if not try to join. On failure, retry joining after random amount of time determined 1 ms to ICMPV6_JOIN_TIMER_EXP_MS(100 ms) 2) For RA issue case, batch sending of RA mesages using wheel timer Testing: Monitor BGP session running sh bgp summary command Before fix: r1# sh bgp summary IPv4 Unicast Summary: BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0 BGP table version 0 RIB entries 0, using 0 bytes of memory Peers 515, using 12 MiB of memory Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt Desc r1-eth0 4 1002 89 90 0 0 0 00:07:10 0 0 N/A r1-eth1 4 1002 89 90 0 0 0 00:07:10 0 0 N/A r1-eth2 4 1002 89 90 0 0 0 00:07:10 0 0 N/A r1-eth3 4 1002 89 90 0 0 0 00:07:10 0 0 N/A r1-eth4 4 1002 89 90 0 0 0 00:07:10 0 0 N/A r1-eth5 4 1002 89 90 0 0 0 00:07:10 0 0 N/A …..<snip>... r1-eth252 4 1002 31 29 0 0 0 00:02:08 0 0 N/A r1-eth253 4 1002 31 29 0 0 0 00:02:08 0 0 N/A r1-eth254 4 1002 31 29 0 0 0 00:02:08 0 0 N/A r1-eth255 4 1002 31 29 0 0 0 00:02:08 0 0 N/A r1-eth256 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth257 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth258 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth259 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth260 4 0 0 0 0 0 0 never Idle 0 N/A ……..<snip>….. r1-eth511 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth512 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth513 4 0 0 0 0 0 0 never Idle 0 N/A r1-eth514 4 0 0 0 0 0 0 never Idle 0 N/A After fix: r1# show bgp summary IPv4 Unicast Summary: BGP router identifier 192.168.1.1, local AS number 1001 VRF default vrf-id 0 BGP table version 0 RIB entries 0, using 0 bytes of memory Peers 515, using 12 MiB of memory Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt Desc r1-eth0 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth1 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth2 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth3 4 1002 64 67 0 0 0 00:05:09 0 0 N/A r1-eth4 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth5 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth6 4 1002 67 70 0 0 0 00:05:22 0 0 N/A r1-eth7 4 1002 87 87 0 0 0 00:07:04 0 0 N/A r1-eth8 4 1002 87 87 0 0 0 00:07:04 0 0 N/A .... r1-eth499 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth500 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth501 4 1002 19 22 0 0 0 00:01:21 0 0 N/A r1-eth502 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth503 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth504 4 1002 20 23 0 0 0 00:01:30 0 0 N/A r1-eth505 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth506 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth507 4 1002 22 25 0 0 0 00:01:39 0 0 N/A r1-eth508 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth509 4 1002 17 20 0 0 0 00:01:13 0 0 N/A r1-eth510 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth511 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth512 4 1002 19 22 0 0 0 00:01:22 0 0 N/A r1-eth513 4 1002 43 43 0 0 0 00:03:22 0 0 N/A r1-eth514 4 1002 43 43 0 0 0 00:03:22 0 0 N/A Signed-off-by: Soumya Roy <[email protected]>
1 parent 6c3e1e4 commit 6a75d33

File tree

6 files changed

+212
-5
lines changed

6 files changed

+212
-5
lines changed

tests/topotests/high_ecmp/test_high_ecmp.py

-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343

4444

4545
def build_topo(tgen):
46-
4746
tgen.add_router("r1")
4847
tgen.add_router("r2")
4948

zebra/interface.h

+3
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ struct zebra_if {
9999
/* back pointer to the interface */
100100
struct interface *ifp;
101101

102+
/* Event timer to batch ICMPv6 join requests */
103+
struct event *icmpv6_join_timer;
104+
102105
enum zebra_if_flags flags;
103106

104107
/* Shutdown configuration. */

zebra/rtadv.c

+183-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "vrf.h"
2222
#include "ns.h"
2323
#include "lib_errors.h"
24+
#include "wheel.h"
25+
#include "network.h"
2426

2527
#include "zebra/interface.h"
2628
#include "zebra/rtadv.h"
@@ -36,6 +38,19 @@ extern struct zebra_privs_t zserv_privs;
3638
static uint32_t interfaces_configured_for_ra_from_bgp;
3739
#define RTADV_ADATA_SIZE 1024
3840

41+
#define PROC_IGMP6 "/proc/net/igmp6"
42+
43+
/* 32 hex chars
44+
* say for 2001:db8:85a3::8a2e:370:7334
45+
* hex string is 20010db885a3000000008a2e03707334,
46+
* which is 32 chars long
47+
*/
48+
#define MAX_V6ADDR_LEN 32
49+
50+
#define MAX_INTERFACE_NAME_LEN 25
51+
52+
#define MAX_CHARS_PER_LINE 1024
53+
3954
#if defined(HAVE_RTADV)
4055

4156
#include "zebra/rtadv_clippy.c"
@@ -58,6 +73,12 @@ DEFINE_MTYPE_STATIC(ZEBRA, ADV_IF, "Advertised Interface");
5873
#define ALLNODE "ff02::1"
5974
#define ALLROUTER "ff02::2"
6075

76+
static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in);
77+
78+
#ifdef __linux__
79+
static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr);
80+
#endif
81+
6182
/* adv list node */
6283
struct adv_if {
6384
char name[IFNAMSIZ];
@@ -462,6 +483,60 @@ static void rtadv_send_packet(int sock, struct interface *ifp,
462483
zif->ra_sent++;
463484
}
464485

486+
static void start_icmpv6_join_timer(struct event *thread)
487+
{
488+
struct interface *ifp = EVENT_ARG(thread);
489+
struct zebra_if *zif = ifp->info;
490+
struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);
491+
492+
if (if_join_all_router(zvrf->rtadv.sock, ifp)) {
493+
/*Wait random amount of time between 1 ms to ICMPV6_JOIN_TIMER_EXP_MS ms*/
494+
int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
495+
event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, ifp, random_ms,
496+
&zif->icmpv6_join_timer);
497+
}
498+
499+
if (IS_ZEBRA_DEBUG_EVENT)
500+
zlog_debug("Processing ICMPv6 join on interface %s(%s:%u)", ifp->name,
501+
ifp->vrf->name, ifp->ifindex);
502+
}
503+
504+
void process_rtadv(void *arg)
505+
{
506+
struct interface *ifp = arg;
507+
struct zebra_if *zif = ifp->info;
508+
struct zebra_vrf *zvrf = rtadv_interface_get_zvrf(ifp);
509+
510+
if (zif->rtadv.inFastRexmit && zif->rtadv.UseFastRexmit) {
511+
if (--zif->rtadv.NumFastReXmitsRemain <= 0)
512+
zif->rtadv.inFastRexmit = 0;
513+
514+
if (IS_ZEBRA_DEBUG_SEND)
515+
zlog_debug("Doing fast RA Rexmit on interface %s(%s:%u)", ifp->name,
516+
ifp->vrf->name, ifp->ifindex);
517+
518+
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
519+
} else {
520+
zif->rtadv.AdvIntervalTimer -= RTADV_TIMER_WHEEL_PERIOD_MS;
521+
/* Wait atleast AdvIntervalTimer time before sending next RA
522+
* AdvIntervalTimer can go negative, when ra_wheel timer expiry
523+
* interval is not a multiple of AdvIntervalTimer. Say ra_wheel
524+
* expiry time is 10 ms and, AdvIntervalTimer == 1005 ms. Allowing
525+
* AdvIntervalTimer to go negative and checking, gurantees that
526+
* we have waited Wait atleast AdvIntervalTimer, so RA can be
527+
* sent now.
528+
*/
529+
if (zif->rtadv.AdvIntervalTimer <= 0) {
530+
zif->rtadv.AdvIntervalTimer = zif->rtadv.MaxRtrAdvInterval;
531+
if (IS_ZEBRA_DEBUG_SEND)
532+
zlog_debug("Doing regular RA Rexmit on interface %s(%s:%u)",
533+
ifp->name, ifp->vrf->name, ifp->ifindex);
534+
535+
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_ENABLE);
536+
}
537+
}
538+
}
539+
465540
static void rtadv_timer(struct event *thread)
466541
{
467542
struct zebra_vrf *zvrf = EVENT_ARG(thread);
@@ -1261,7 +1336,13 @@ static void rtadv_start_interface_events(struct zebra_vrf *zvrf,
12611336
if (adv_if != NULL)
12621337
return; /* Already added */
12631338

1264-
if_join_all_router(zvrf->rtadv.sock, zif->ifp);
1339+
if (if_join_all_router(zvrf->rtadv.sock, zif->ifp)) {
1340+
/*Failed to join on 1st attempt, wait random amount of time between 1 ms
1341+
to ICMPV6_JOIN_TIMER_EXP_MS ms*/
1342+
int random_ms = (frr_weak_random() % ICMPV6_JOIN_TIMER_EXP_MS) + 1;
1343+
event_add_timer_msec(zrouter.master, start_icmpv6_join_timer, zif->ifp, random_ms,
1344+
&zif->icmpv6_join_timer);
1345+
}
12651346

12661347
if (adv_if_list_count(&zvrf->rtadv.adv_if) == 1)
12671348
rtadv_event(zvrf, RTADV_START, 0);
@@ -1281,6 +1362,8 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
12811362
if (status == RA_SUPPRESS) {
12821363
/* RA is currently enabled */
12831364
if (zif->rtadv.AdvSendAdvertisements) {
1365+
/* Try to delete from the ra wheel */
1366+
wheel_remove_item(zrouter.ra_wheel, ifp);
12841367
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
12851368
zif->rtadv.AdvSendAdvertisements = 0;
12861369
zif->rtadv.AdvIntervalTimer = 0;
@@ -1311,6 +1394,7 @@ void ipv6_nd_suppress_ra_set(struct interface *ifp,
13111394
RTADV_NUM_FAST_REXMITS;
13121395
}
13131396

1397+
wheel_add_item(zrouter.ra_wheel, ifp);
13141398
rtadv_start_interface_events(zvrf, zif);
13151399
}
13161400
}
@@ -1438,6 +1522,12 @@ void rtadv_stop_ra(struct interface *ifp)
14381522
zif = ifp->info;
14391523
zvrf = rtadv_interface_get_zvrf(ifp);
14401524

1525+
/*Try to delete from ra wheels */
1526+
wheel_remove_item(zrouter.ra_wheel, ifp);
1527+
1528+
/*Turn off event for ICMPv6 join*/
1529+
EVENT_OFF(zif->icmpv6_join_timer);
1530+
14411531
if (zif->rtadv.AdvSendAdvertisements)
14421532
rtadv_send_packet(zvrf->rtadv.sock, ifp, RA_SUPPRESS);
14431533
}
@@ -1730,8 +1820,7 @@ static void rtadv_event(struct zebra_vrf *zvrf, enum rtadv_event event, int val)
17301820
case RTADV_START:
17311821
event_add_read(zrouter.master, rtadv_read, zvrf, rtadv->sock,
17321822
&rtadv->ra_read);
1733-
event_add_event(zrouter.master, rtadv_timer, zvrf, 0,
1734-
&rtadv->ra_timer);
1823+
17351824
break;
17361825
case RTADV_STOP:
17371826
EVENT_OFF(rtadv->ra_timer);
@@ -1862,24 +1951,114 @@ void rtadv_cmd_init(void)
18621951
install_element(VIEW_NODE, &show_ipv6_nd_ra_if_cmd);
18631952
}
18641953

1954+
#ifdef __linux__
1955+
static bool v6_addr_hex_str_to_in6_addr(const char *hex_str, struct in6_addr *addr)
1956+
{
1957+
size_t str_len = strlen(hex_str);
1958+
1959+
if (str_len != MAX_V6ADDR_LEN) {
1960+
flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid V6 addr hex len %zu", str_len);
1961+
return false;
1962+
}
1963+
1964+
for (int i = 0; i < 16; i++) {
1965+
char byte_str[3] = { hex_str[i * 2], hex_str[i * 2 + 1], '\0' };
1966+
addr->s6_addr[i] = (uint8_t)strtol(byte_str, NULL, 16);
1967+
}
1968+
1969+
return true;
1970+
}
1971+
#endif
1972+
1973+
/* Checks if an interface is part of a multicast group, no null check for input strings */
1974+
static bool is_interface_in_group(const char *ifname_in, const char *mcast_addr_in)
1975+
{
1976+
#ifdef __linux__
1977+
char line[MAX_CHARS_PER_LINE];
1978+
char ifname_found[MAX_INTERFACE_NAME_LEN];
1979+
char mcast_addr_found_hex_str[MAX_V6ADDR_LEN + 5];
1980+
struct in6_addr mcast_addr_in_bin;
1981+
struct in6_addr mcast_addr_found_bin;
1982+
int if_index = -1;
1983+
int ifname_in_len = 0;
1984+
int ifname_found_len = 0;
1985+
1986+
FILE *fp = fopen(PROC_IGMP6, "r");
1987+
1988+
if (!fp) {
1989+
flog_err_sys(EC_LIB_SYSTEM_CALL, "Failed to open %s", PROC_IGMP6);
1990+
return false;
1991+
}
1992+
1993+
/* Convert input IPv6 address to binary */
1994+
if (inet_pton(AF_INET6, mcast_addr_in, &mcast_addr_in_bin) != 1) {
1995+
flog_err_sys(EC_LIB_SYSTEM_CALL, "Invalid IPv6 address format %s", mcast_addr_in);
1996+
fclose(fp);
1997+
return false;
1998+
}
1999+
2000+
/* Convert binary to hex format */
2001+
while (fgets(line, sizeof(line), fp)) {
2002+
sscanf(line, "%d %s %s", &if_index, ifname_found, mcast_addr_found_hex_str);
2003+
2004+
ifname_in_len = strlen(ifname_in);
2005+
ifname_found_len = strlen(ifname_found);
2006+
if (ifname_in_len != ifname_found_len)
2007+
continue;
2008+
2009+
/* Locate 'x' if "0x" is present or not, if present go past that */
2010+
const char *clean_mcast_addr_hex_str = strchr(mcast_addr_found_hex_str, 'x');
2011+
if (clean_mcast_addr_hex_str) {
2012+
clean_mcast_addr_hex_str++;
2013+
} else {
2014+
clean_mcast_addr_hex_str = mcast_addr_found_hex_str;
2015+
}
2016+
2017+
if (!v6_addr_hex_str_to_in6_addr(clean_mcast_addr_hex_str, &mcast_addr_found_bin))
2018+
continue;
2019+
2020+
if ((!strncmp(ifname_in, ifname_found, ifname_in_len)) &&
2021+
(!IPV6_ADDR_CMP(&mcast_addr_in_bin, &mcast_addr_found_bin))) {
2022+
fclose(fp);
2023+
/* Already joined */
2024+
return true;
2025+
}
2026+
}
2027+
2028+
fclose(fp);
2029+
2030+
#endif
2031+
2032+
/* Not joined */
2033+
return false;
2034+
}
2035+
18652036
static int if_join_all_router(int sock, struct interface *ifp)
18662037
{
18672038
int ret;
18682039

18692040
struct ipv6_mreq mreq;
18702041

2042+
if (is_interface_in_group(ifp->name, ALLROUTER))
2043+
/* Interface is already part of the group, so return sucess */
2044+
return 0;
2045+
18712046
memset(&mreq, 0, sizeof(mreq));
18722047
inet_pton(AF_INET6, ALLROUTER, &mreq.ipv6mr_multiaddr);
18732048
mreq.ipv6mr_interface = ifp->ifindex;
18742049

18752050
ret = setsockopt(sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, (char *)&mreq,
18762051
sizeof(mreq));
1877-
if (ret < 0)
2052+
2053+
if (ret < 0) {
18782054
flog_err_sys(EC_LIB_SOCKET,
18792055
"%s(%u): Failed to join group, socket %u error %s",
18802056
ifp->name, ifp->ifindex, sock,
18812057
safe_strerror(errno));
18822058

2059+
return ret;
2060+
}
2061+
18832062
if (IS_ZEBRA_DEBUG_EVENT)
18842063
zlog_debug(
18852064
"%s(%s:%u): Join All-Routers multicast group, socket %u",

zebra/rtadv.h

+1
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ extern void zebra_interface_radv_enable(ZAPI_HANDLER_ARGS);
460460
extern uint32_t rtadv_get_interfaces_configured_from_bgp(void);
461461
extern bool rtadv_compiled_in(void);
462462
extern void rtadv_init(void);
463+
extern void process_rtadv(void *arg);
463464

464465
#ifdef __cplusplus
465466
}

zebra/zebra_router.c

+18
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "zebra/zebra_tc.h"
1818
#include "debug.h"
1919
#include "zebra_script.h"
20+
#include "wheel.h"
2021

2122
DEFINE_MTYPE_STATIC(ZEBRA, RIB_TABLE_INFO, "RIB table info");
2223
DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_RT_TABLE, "Zebra VRF table");
@@ -220,10 +221,22 @@ uint32_t zebra_router_get_next_sequence(void)
220221
memory_order_relaxed);
221222
}
222223

224+
static inline unsigned int interface_hash_key(const void *arg)
225+
{
226+
const struct interface *ifp = arg;
227+
228+
return ifp->ifindex;
229+
}
230+
223231
void zebra_router_terminate(void)
224232
{
225233
struct zebra_router_table *zrt, *tmp;
226234

235+
if (zrouter.ra_wheel) {
236+
wheel_delete(zrouter.ra_wheel);
237+
zrouter.ra_wheel = NULL;
238+
}
239+
227240
EVENT_OFF(zrouter.t_rib_sweep);
228241

229242
RB_FOREACH_SAFE (zrt, zebra_router_table_head, &zrouter.tables, tmp)
@@ -278,6 +291,11 @@ void zebra_router_init(bool asic_offload, bool notify_on_ack,
278291

279292
zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER;
280293

294+
/*Init V6 RA batching stuffs*/
295+
zrouter.ra_wheel = wheel_init(zrouter.master, RTADV_TIMER_WHEEL_PERIOD_MS,
296+
RTADV_TIMER_WHEEL_SLOTS_NO, interface_hash_key, process_rtadv,
297+
NULL);
298+
281299
zebra_vxlan_init();
282300
zebra_mlag_init();
283301
zebra_neigh_init();

zebra/zebra_router.h

+7
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,19 @@ struct zebra_mlag_info {
112112
struct event *t_write;
113113
};
114114

115+
#define RTADV_TIMER_WHEEL_PERIOD_MS 1000
116+
#define RTADV_TIMER_WHEEL_SLOTS_NO 100
117+
#define ICMPV6_JOIN_TIMER_EXP_MS 100
118+
115119
struct zebra_router {
116120
atomic_bool in_shutdown;
117121

118122
/* Thread master */
119123
struct event_loop *master;
120124

125+
/* Wheel to process V6 RA update */
126+
struct timer_wheel *ra_wheel;
127+
121128
/* Lists of clients who have connected to us */
122129
struct list *client_list;
123130

0 commit comments

Comments
 (0)