Skip to content

Commit 12bf042

Browse files
committed
bgpd: Modify bgp to handle packet events in a FIFO
Current behavor of BGP is to have a event per connection. Given that on startup of BGP with a high number of neighbors you end up with 2 * # of peers events that are being processed. Additionally once BGP has selected the connection this still only comes down to 512 events. This number of events is swamping the event system and in addition delaying any other work from being done in BGP at all because the the 512 events are always going to take precedence over everything else. The other main events are the handling of the metaQ(1 event), update group events( 1 per update group ) and the zebra batching event. These are being swamped. Modify the BGP code to have a FIFO of connections. As new data comes in to read, place the connection on the end of the FIFO. Have the bgp_process_packet handle up to 100 packets spread across the individual peers where each peer/connection is limited to the original quanta. During testing I noticed that withdrawal events at very very large scale are taking up to 40 seconds to process so I added a check for yielding to further limit the number of packets being processed. This change also allow for BGP to be interactive again on scale setups on initial convergence. Prior to this change any vtysh command entered would be delayed by 10's of seconds in my setup while BGP was doing other work. Signed-off-by: Donald Sharp <[email protected]>
1 parent f379064 commit 12bf042

File tree

7 files changed

+158
-29
lines changed

7 files changed

+158
-29
lines changed

bgpd/bgp_fsm.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,11 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
184184
EVENT_OFF(keeper->t_delayopen);
185185
EVENT_OFF(keeper->t_connect_check_r);
186186
EVENT_OFF(keeper->t_connect_check_w);
187-
EVENT_OFF(keeper->t_process_packet);
187+
188+
frr_with_mutex (&bm->peer_connection_mtx) {
189+
if (peer_connection_fifo_member(&bm->connection_fifo, keeper))
190+
peer_connection_fifo_del(&bm->connection_fifo, keeper);
191+
}
188192

189193
/*
190194
* At this point in time, it is possible that there are packets pending
@@ -305,8 +309,13 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
305309

306310
bgp_reads_on(keeper);
307311
bgp_writes_on(keeper);
308-
event_add_event(bm->master, bgp_process_packet, keeper, 0,
309-
&keeper->t_process_packet);
312+
313+
frr_with_mutex (&bm->peer_connection_mtx) {
314+
if (!peer_connection_fifo_member(&bm->connection_fifo, keeper)) {
315+
peer_connection_fifo_add_tail(&bm->connection_fifo, keeper);
316+
}
317+
}
318+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
310319

311320
return (peer);
312321
}

bgpd/bgp_io.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ void bgp_reads_off(struct peer_connection *connection)
9999
assert(fpt->running);
100100

101101
event_cancel_async(fpt->master, &connection->t_read, NULL);
102-
EVENT_OFF(connection->t_process_packet);
102+
103+
frr_with_mutex (&bm->peer_connection_mtx) {
104+
if (peer_connection_fifo_member(&bm->connection_fifo, connection))
105+
peer_connection_fifo_del(&bm->connection_fifo, connection);
106+
}
103107

104108
UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
105109
}
@@ -292,9 +296,13 @@ static void bgp_process_reads(struct event *thread)
292296

293297
event_add_read(fpt->master, bgp_process_reads, connection,
294298
connection->fd, &connection->t_read);
295-
if (added_pkt)
296-
event_add_event(bm->master, bgp_process_packet, connection, 0,
297-
&connection->t_process_packet);
299+
if (added_pkt) {
300+
frr_with_mutex (&bm->peer_connection_mtx) {
301+
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
302+
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
303+
}
304+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
305+
}
298306
}
299307

300308
/*

bgpd/bgp_io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#define BGP_WRITE_PACKET_MAX 64U
1212
#define BGP_READ_PACKET_MAX 10U
13+
#define BGP_PACKET_PROCESS_LIMIT 100
1314

1415
#include "bgpd/bgpd.h"
1516
#include "frr_pthread.h"

bgpd/bgp_main.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,14 @@ __attribute__((__noreturn__)) void sigint(void)
161161

162162
bgp_exit(0);
163163

164+
/*
165+
* This is being done after bgp_exit because items may be removed
166+
* from the connection_fifo
167+
*/
168+
peer_connection_fifo_fini(&bm->connection_fifo);
169+
EVENT_OFF(bm->e_process_packet);
170+
pthread_mutex_destroy(&bm->peer_connection_mtx);
171+
164172
exit(0);
165173
}
166174

bgpd/bgp_packet.c

Lines changed: 106 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3967,6 +3967,18 @@ int bgp_capability_receive(struct peer_connection *connection,
39673967
* would not, making event flow difficult to understand. Please think twice
39683968
* before hacking this.
39693969
*
3970+
* packet_processing is now a FIFO of connections that need to be handled
3971+
* This loop has a maximum run of 100(BGP_PACKET_PROCESS_LIMIT) packets,
3972+
* but each individual connection can only handle the quanta value as
3973+
* specified in bgp_vty.c. If the connection still has work to do, place it
3974+
* back on the back of the queue for more work. Do note that event_should_yield
3975+
* is also being called to figure out if processing should stop and work
3976+
* picked up after other items can run. This was added *After* withdrawals
3977+
* started being processed at scale and this function was taking cpu for 40+ seconds
3978+
* On my machine we are getting 2-3 packets before a yield should happen in the
3979+
* update case. Withdrawal is 1 packet being processed(note this is a very very
3980+
* fast computer) before other items should be run.
3981+
*
39703982
* Thread type: EVENT_EVENT
39713983
* @param thread
39723984
* @return 0
@@ -3979,30 +3991,54 @@ void bgp_process_packet(struct event *thread)
39793991
uint32_t rpkt_quanta_old; // how many packets to read
39803992
int fsm_update_result; // return code of bgp_event_update()
39813993
int mprc; // message processing return code
3994+
uint32_t processed = 0, curr_connection_processed = 0;
3995+
bool more_work = false;
3996+
size_t count;
3997+
uint32_t total_packets_to_process, total_processed = 0;
3998+
3999+
frr_with_mutex (&bm->peer_connection_mtx)
4000+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4001+
4002+
if (!connection)
4003+
goto done;
39824004

3983-
connection = EVENT_ARG(thread);
4005+
total_packets_to_process = BGP_PACKET_PROCESS_LIMIT;
39844006
peer = connection->peer;
39854007
rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
39864008
memory_order_relaxed);
4009+
39874010
fsm_update_result = 0;
39884011

3989-
/* Guard against scheduled events that occur after peer deletion. */
3990-
if (connection->status == Deleted || connection->status == Clearing)
3991-
return;
4012+
while ((processed < total_packets_to_process) && connection) {
4013+
total_processed++;
4014+
/* Guard against scheduled events that occur after peer deletion. */
4015+
if (connection->status == Deleted || connection->status == Clearing) {
4016+
frr_with_mutex (&bm->peer_connection_mtx)
4017+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
39924018

3993-
unsigned int processed = 0;
4019+
if (connection)
4020+
peer = connection->peer;
4021+
4022+
continue;
4023+
}
39944024

3995-
while (processed < rpkt_quanta_old) {
39964025
uint8_t type = 0;
39974026
bgp_size_t size;
39984027
char notify_data_length[2];
39994028

4000-
frr_with_mutex (&connection->io_mtx) {
4029+
frr_with_mutex (&connection->io_mtx)
40014030
peer->curr = stream_fifo_pop(connection->ibuf);
4002-
}
40034031

4004-
if (peer->curr == NULL) // no packets to process, hmm...
4005-
return;
4032+
if (peer->curr == NULL) {
4033+
frr_with_mutex (&bm->peer_connection_mtx)
4034+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4035+
4036+
4037+
if (connection)
4038+
peer = connection->peer;
4039+
4040+
continue;
4041+
}
40064042

40074043
/* skip the marker and copy the packet length */
40084044
stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
@@ -4106,32 +4142,81 @@ void bgp_process_packet(struct event *thread)
41064142
stream_free(peer->curr);
41074143
peer->curr = NULL;
41084144
processed++;
4145+
curr_connection_processed++;
41094146

41104147
/* Update FSM */
41114148
if (mprc != BGP_PACKET_NOOP)
41124149
fsm_update_result = bgp_event_update(connection, mprc);
4113-
else
4114-
continue;
41154150

41164151
/*
41174152
* If peer was deleted, do not process any more packets. This
41184153
* is usually due to executing BGP_Stop or a stub deletion.
41194154
*/
4120-
if (fsm_update_result == FSM_PEER_TRANSFERRED
4121-
|| fsm_update_result == FSM_PEER_STOPPED)
4122-
break;
4155+
if (fsm_update_result == FSM_PEER_TRANSFERRED ||
4156+
fsm_update_result == FSM_PEER_STOPPED) {
4157+
frr_with_mutex (&bm->peer_connection_mtx)
4158+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4159+
4160+
if (connection)
4161+
peer = connection->peer;
4162+
4163+
continue;
4164+
}
4165+
4166+
bool yield = event_should_yield(thread);
4167+
if (curr_connection_processed >= rpkt_quanta_old || yield) {
4168+
curr_connection_processed = 0;
4169+
frr_with_mutex (&bm->peer_connection_mtx) {
4170+
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
4171+
peer_connection_fifo_add_tail(&bm->connection_fifo,
4172+
connection);
4173+
if (!yield)
4174+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4175+
else
4176+
connection = NULL;
4177+
}
4178+
if (connection)
4179+
peer = connection->peer;
4180+
4181+
continue;
4182+
}
4183+
4184+
frr_with_mutex (&connection->io_mtx) {
4185+
if (connection->ibuf->count > 0)
4186+
more_work = true;
4187+
else
4188+
more_work = false;
4189+
}
4190+
4191+
if (!more_work) {
4192+
frr_with_mutex (&bm->peer_connection_mtx)
4193+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4194+
4195+
if (connection)
4196+
peer = connection->peer;
4197+
}
41234198
}
41244199

4125-
if (fsm_update_result != FSM_PEER_TRANSFERRED
4126-
&& fsm_update_result != FSM_PEER_STOPPED) {
4200+
if (connection) {
41274201
frr_with_mutex (&connection->io_mtx) {
4128-
// more work to do, come back later
41294202
if (connection->ibuf->count > 0)
4130-
event_add_event(bm->master, bgp_process_packet,
4131-
connection, 0,
4132-
&connection->t_process_packet);
4203+
more_work = true;
4204+
else
4205+
more_work = false;
4206+
}
4207+
frr_with_mutex (&bm->peer_connection_mtx) {
4208+
if (more_work &&
4209+
!peer_connection_fifo_member(&bm->connection_fifo, connection))
4210+
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
41334211
}
41344212
}
4213+
4214+
done:
4215+
frr_with_mutex (&bm->peer_connection_mtx)
4216+
count = peer_connection_fifo_count(&bm->connection_fifo);
4217+
4218+
if (count)
4219+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
41354220
}
41364221

41374222
/* Send EOR when routes are processed by selection deferral timer */

bgpd/bgpd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8683,6 +8683,10 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
86838683

86848684
bm = &bgp_master;
86858685

8686+
/* Initialize the peer connection FIFO list */
8687+
peer_connection_fifo_init(&bm->connection_fifo);
8688+
pthread_mutex_init(&bm->peer_connection_mtx, NULL);
8689+
86868690
zebra_announce_init(&bm->zebra_announce_head);
86878691
zebra_l2_vni_init(&bm->zebra_l2_vni_head);
86888692
zebra_l3_vni_init(&bm->zebra_l3_vni_head);

bgpd/bgpd.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ enum bgp_af_index {
107107
extern struct frr_pthread *bgp_pth_io;
108108
extern struct frr_pthread *bgp_pth_ka;
109109

110+
/* FIFO list for peer connections */
111+
PREDECL_LIST(peer_connection_fifo);
112+
110113
/* BGP master for system wide configurations and variables. */
111114
struct bgp_master {
112115
/* BGP instance list. */
@@ -121,6 +124,11 @@ struct bgp_master {
121124
/* BGP port number. */
122125
uint16_t port;
123126

127+
/* FIFO list head for peer connections */
128+
struct peer_connection_fifo_head connection_fifo;
129+
struct event *e_process_packet;
130+
pthread_mutex_t peer_connection_mtx;
131+
124132
/* Listener addresses */
125133
struct list *addresses;
126134

@@ -1378,7 +1386,6 @@ struct peer_connection {
13781386
struct event *t_pmax_restart;
13791387

13801388
struct event *t_routeadv;
1381-
struct event *t_process_packet;
13821389

13831390
struct event *t_stop_with_notify;
13841391

@@ -1394,7 +1401,14 @@ struct peer_connection {
13941401

13951402
union sockunion *su_local; /* Sockunion of local address. */
13961403
union sockunion *su_remote; /* Sockunion of remote address. */
1404+
1405+
/* For FIFO list */
1406+
struct peer_connection_fifo_item fifo_item;
13971407
};
1408+
1409+
/* Declare the FIFO list implementation */
1410+
DECLARE_LIST(peer_connection_fifo, struct peer_connection, fifo_item);
1411+
13981412
const char *bgp_peer_get_connection_direction(struct peer_connection *connection);
13991413
extern struct peer_connection *bgp_peer_connection_new(struct peer *peer);
14001414
extern void bgp_peer_connection_free(struct peer_connection **connection);

0 commit comments

Comments
 (0)