Skip to content

Commit c312917

Browse files
authored
Merge pull request FRRouting#18450 from donaldsharp/bgp_packet_reads
Bgp packet reads conversion to a FIFO
2 parents f33dcf3 + 83a92c9 commit c312917

19 files changed

+209
-58
lines changed

bgpd/bgp_fsm.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,11 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
184184
EVENT_OFF(keeper->t_delayopen);
185185
EVENT_OFF(keeper->t_connect_check_r);
186186
EVENT_OFF(keeper->t_connect_check_w);
187-
EVENT_OFF(keeper->t_process_packet);
187+
188+
frr_with_mutex (&bm->peer_connection_mtx) {
189+
if (peer_connection_fifo_member(&bm->connection_fifo, keeper))
190+
peer_connection_fifo_del(&bm->connection_fifo, keeper);
191+
}
188192

189193
/*
190194
* At this point in time, it is possible that there are packets pending
@@ -305,8 +309,13 @@ static struct peer *peer_xfer_conn(struct peer *from_peer)
305309

306310
bgp_reads_on(keeper);
307311
bgp_writes_on(keeper);
308-
event_add_event(bm->master, bgp_process_packet, keeper, 0,
309-
&keeper->t_process_packet);
312+
313+
frr_with_mutex (&bm->peer_connection_mtx) {
314+
if (!peer_connection_fifo_member(&bm->connection_fifo, keeper)) {
315+
peer_connection_fifo_add_tail(&bm->connection_fifo, keeper);
316+
}
317+
}
318+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
310319

311320
return (peer);
312321
}

bgpd/bgp_io.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ void bgp_reads_off(struct peer_connection *connection)
9999
assert(fpt->running);
100100

101101
event_cancel_async(fpt->master, &connection->t_read, NULL);
102-
EVENT_OFF(connection->t_process_packet);
102+
103+
frr_with_mutex (&bm->peer_connection_mtx) {
104+
if (peer_connection_fifo_member(&bm->connection_fifo, connection))
105+
peer_connection_fifo_del(&bm->connection_fifo, connection);
106+
}
103107

104108
UNSET_FLAG(connection->thread_flags, PEER_THREAD_READS_ON);
105109
}
@@ -292,9 +296,13 @@ static void bgp_process_reads(struct event *thread)
292296

293297
event_add_read(fpt->master, bgp_process_reads, connection,
294298
connection->fd, &connection->t_read);
295-
if (added_pkt)
296-
event_add_event(bm->master, bgp_process_packet, connection, 0,
297-
&connection->t_process_packet);
299+
if (added_pkt) {
300+
frr_with_mutex (&bm->peer_connection_mtx) {
301+
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
302+
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
303+
}
304+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
305+
}
298306
}
299307

300308
/*

bgpd/bgp_io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#define BGP_WRITE_PACKET_MAX 64U
1212
#define BGP_READ_PACKET_MAX 10U
13+
#define BGP_PACKET_PROCESS_LIMIT 100
1314

1415
#include "bgpd/bgpd.h"
1516
#include "frr_pthread.h"

bgpd/bgp_main.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,14 @@ __attribute__((__noreturn__)) void sigint(void)
161161

162162
bgp_exit(0);
163163

164+
/*
165+
* This is being done after bgp_exit because items may be removed
166+
* from the connection_fifo
167+
*/
168+
peer_connection_fifo_fini(&bm->connection_fifo);
169+
EVENT_OFF(bm->e_process_packet);
170+
pthread_mutex_destroy(&bm->peer_connection_mtx);
171+
164172
exit(0);
165173
}
166174

bgpd/bgp_packet.c

Lines changed: 106 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3974,6 +3974,18 @@ int bgp_capability_receive(struct peer_connection *connection,
39743974
* would not, making event flow difficult to understand. Please think twice
39753975
* before hacking this.
39763976
*
3977+
* packet_processing is now a FIFO of connections that need to be handled
3978+
* This loop has a maximum run of 100(BGP_PACKET_PROCESS_LIMIT) packets,
3979+
* but each individual connection can only handle the quanta value as
3980+
* specified in bgp_vty.c. If the connection still has work to do, place it
3981+
* back on the back of the queue for more work. Do note that event_should_yield
3982+
* is also being called to figure out if processing should stop and work
3983+
* picked up after other items can run. This was added *After* withdrawals
3984+
* started being processed at scale and this function was taking cpu for 40+ seconds
3985+
* On my machine we are getting 2-3 packets before a yield should happen in the
3986+
* update case. Withdrawal is 1 packet being processed(note this is a very very
3987+
* fast computer) before other items should be run.
3988+
*
39773989
* Thread type: EVENT_EVENT
39783990
* @param thread
39793991
* @return 0
@@ -3986,30 +3998,54 @@ void bgp_process_packet(struct event *thread)
39863998
uint32_t rpkt_quanta_old; // how many packets to read
39873999
int fsm_update_result; // return code of bgp_event_update()
39884000
int mprc; // message processing return code
4001+
uint32_t processed = 0, curr_connection_processed = 0;
4002+
bool more_work = false;
4003+
size_t count;
4004+
uint32_t total_packets_to_process, total_processed = 0;
4005+
4006+
frr_with_mutex (&bm->peer_connection_mtx)
4007+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4008+
4009+
if (!connection)
4010+
goto done;
39894011

3990-
connection = EVENT_ARG(thread);
4012+
total_packets_to_process = BGP_PACKET_PROCESS_LIMIT;
39914013
peer = connection->peer;
39924014
rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
39934015
memory_order_relaxed);
4016+
39944017
fsm_update_result = 0;
39954018

3996-
/* Guard against scheduled events that occur after peer deletion. */
3997-
if (connection->status == Deleted || connection->status == Clearing)
3998-
return;
4019+
while ((processed < total_packets_to_process) && connection) {
4020+
total_processed++;
4021+
/* Guard against scheduled events that occur after peer deletion. */
4022+
if (connection->status == Deleted || connection->status == Clearing) {
4023+
frr_with_mutex (&bm->peer_connection_mtx)
4024+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
39994025

4000-
unsigned int processed = 0;
4026+
if (connection)
4027+
peer = connection->peer;
4028+
4029+
continue;
4030+
}
40014031

4002-
while (processed < rpkt_quanta_old) {
40034032
uint8_t type = 0;
40044033
bgp_size_t size;
40054034
char notify_data_length[2];
40064035

4007-
frr_with_mutex (&connection->io_mtx) {
4036+
frr_with_mutex (&connection->io_mtx)
40084037
peer->curr = stream_fifo_pop(connection->ibuf);
4009-
}
40104038

4011-
if (peer->curr == NULL) // no packets to process, hmm...
4012-
return;
4039+
if (peer->curr == NULL) {
4040+
frr_with_mutex (&bm->peer_connection_mtx)
4041+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4042+
4043+
4044+
if (connection)
4045+
peer = connection->peer;
4046+
4047+
continue;
4048+
}
40134049

40144050
/* skip the marker and copy the packet length */
40154051
stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
@@ -4113,32 +4149,81 @@ void bgp_process_packet(struct event *thread)
41134149
stream_free(peer->curr);
41144150
peer->curr = NULL;
41154151
processed++;
4152+
curr_connection_processed++;
41164153

41174154
/* Update FSM */
41184155
if (mprc != BGP_PACKET_NOOP)
41194156
fsm_update_result = bgp_event_update(connection, mprc);
4120-
else
4121-
continue;
41224157

41234158
/*
41244159
* If peer was deleted, do not process any more packets. This
41254160
* is usually due to executing BGP_Stop or a stub deletion.
41264161
*/
4127-
if (fsm_update_result == FSM_PEER_TRANSFERRED
4128-
|| fsm_update_result == FSM_PEER_STOPPED)
4129-
break;
4162+
if (fsm_update_result == FSM_PEER_TRANSFERRED ||
4163+
fsm_update_result == FSM_PEER_STOPPED) {
4164+
frr_with_mutex (&bm->peer_connection_mtx)
4165+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4166+
4167+
if (connection)
4168+
peer = connection->peer;
4169+
4170+
continue;
4171+
}
4172+
4173+
bool yield = event_should_yield(thread);
4174+
if (curr_connection_processed >= rpkt_quanta_old || yield) {
4175+
curr_connection_processed = 0;
4176+
frr_with_mutex (&bm->peer_connection_mtx) {
4177+
if (!peer_connection_fifo_member(&bm->connection_fifo, connection))
4178+
peer_connection_fifo_add_tail(&bm->connection_fifo,
4179+
connection);
4180+
if (!yield)
4181+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4182+
else
4183+
connection = NULL;
4184+
}
4185+
if (connection)
4186+
peer = connection->peer;
4187+
4188+
continue;
4189+
}
4190+
4191+
frr_with_mutex (&connection->io_mtx) {
4192+
if (connection->ibuf->count > 0)
4193+
more_work = true;
4194+
else
4195+
more_work = false;
4196+
}
4197+
4198+
if (!more_work) {
4199+
frr_with_mutex (&bm->peer_connection_mtx)
4200+
connection = peer_connection_fifo_pop(&bm->connection_fifo);
4201+
4202+
if (connection)
4203+
peer = connection->peer;
4204+
}
41304205
}
41314206

4132-
if (fsm_update_result != FSM_PEER_TRANSFERRED
4133-
&& fsm_update_result != FSM_PEER_STOPPED) {
4207+
if (connection) {
41344208
frr_with_mutex (&connection->io_mtx) {
4135-
// more work to do, come back later
41364209
if (connection->ibuf->count > 0)
4137-
event_add_event(bm->master, bgp_process_packet,
4138-
connection, 0,
4139-
&connection->t_process_packet);
4210+
more_work = true;
4211+
else
4212+
more_work = false;
4213+
}
4214+
frr_with_mutex (&bm->peer_connection_mtx) {
4215+
if (more_work &&
4216+
!peer_connection_fifo_member(&bm->connection_fifo, connection))
4217+
peer_connection_fifo_add_tail(&bm->connection_fifo, connection);
41404218
}
41414219
}
4220+
4221+
done:
4222+
frr_with_mutex (&bm->peer_connection_mtx)
4223+
count = peer_connection_fifo_count(&bm->connection_fifo);
4224+
4225+
if (count)
4226+
event_add_event(bm->master, bgp_process_packet, NULL, 0, &bm->e_process_packet);
41424227
}
41434228

41444229
/* Send EOR when routes are processed by selection deferral timer */

bgpd/bgp_route.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4201,12 +4201,30 @@ static wq_item_status meta_queue_process(struct work_queue *dummy, void *data)
42014201
{
42024202
struct meta_queue *mq = data;
42034203
uint32_t i;
4204+
uint32_t peers_on_fifo;
4205+
static uint32_t total_runs = 0;
4206+
4207+
total_runs++;
4208+
4209+
frr_with_mutex (&bm->peer_connection_mtx)
4210+
peers_on_fifo = peer_connection_fifo_count(&bm->connection_fifo);
4211+
4212+
/*
4213+
* If the number of peers on the fifo is greater than 10
4214+
* let's yield this run of the MetaQ to allow the packet processing to make
4215+
* progress against the incoming packets. But we should also
4216+
* attempt to allow this to run occassionally. Let's run
4217+
* something every 10 attempts to process the work queue.
4218+
*/
4219+
if (peers_on_fifo > 10 && total_runs % 10 != 0)
4220+
return WQ_QUEUE_BLOCKED;
42044221

42054222
for (i = 0; i < MQ_SIZE; i++)
42064223
if (process_subq(mq->subq[i], i)) {
42074224
mq->size--;
42084225
break;
42094226
}
4227+
42104228
return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
42114229
}
42124230

bgpd/bgpd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8683,6 +8683,10 @@ void bgp_master_init(struct event_loop *master, const int buffer_size,
86838683

86848684
bm = &bgp_master;
86858685

8686+
/* Initialize the peer connection FIFO list */
8687+
peer_connection_fifo_init(&bm->connection_fifo);
8688+
pthread_mutex_init(&bm->peer_connection_mtx, NULL);
8689+
86868690
zebra_announce_init(&bm->zebra_announce_head);
86878691
zebra_l2_vni_init(&bm->zebra_l2_vni_head);
86888692
zebra_l3_vni_init(&bm->zebra_l3_vni_head);

bgpd/bgpd.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ enum bgp_af_index {
107107
extern struct frr_pthread *bgp_pth_io;
108108
extern struct frr_pthread *bgp_pth_ka;
109109

110+
/* FIFO list for peer connections */
111+
PREDECL_LIST(peer_connection_fifo);
112+
110113
/* BGP master for system wide configurations and variables. */
111114
struct bgp_master {
112115
/* BGP instance list. */
@@ -121,6 +124,11 @@ struct bgp_master {
121124
/* BGP port number. */
122125
uint16_t port;
123126

127+
/* FIFO list head for peer connections */
128+
struct peer_connection_fifo_head connection_fifo;
129+
struct event *e_process_packet;
130+
pthread_mutex_t peer_connection_mtx;
131+
124132
/* Listener addresses */
125133
struct list *addresses;
126134

@@ -1378,7 +1386,6 @@ struct peer_connection {
13781386
struct event *t_pmax_restart;
13791387

13801388
struct event *t_routeadv;
1381-
struct event *t_process_packet;
13821389

13831390
struct event *t_stop_with_notify;
13841391

@@ -1394,7 +1401,14 @@ struct peer_connection {
13941401

13951402
union sockunion *su_local; /* Sockunion of local address. */
13961403
union sockunion *su_remote; /* Sockunion of remote address. */
1404+
1405+
/* For FIFO list */
1406+
struct peer_connection_fifo_item fifo_item;
13971407
};
1408+
1409+
/* Declare the FIFO list implementation */
1410+
DECLARE_LIST(peer_connection_fifo, struct peer_connection, fifo_item);
1411+
13981412
const char *bgp_peer_get_connection_direction(struct peer_connection *connection);
13991413
extern struct peer_connection *bgp_peer_connection_new(struct peer *peer);
14001414
extern void bgp_peer_connection_free(struct peer_connection **connection);

tests/topotests/high_ecmp/r1/frr.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,3 +2577,8 @@ interface r1-eth514
25772577
ipv6 address 2001:db8:3:5::1/64
25782578
no shut
25792579
!
2580+
router bgp 1001
2581+
timers bgp 5 60
2582+
no bgp ebgp-requires-policy
2583+
read-quanta 1
2584+
!

tests/topotests/high_ecmp/r1/frr_ipv4_bgp.conf

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
router bgp 1001
2-
timers bgp 5 20
3-
no bgp ebgp-requires-policy
4-
read-quanta 1
52
neighbor 10.1.1.2 remote-as external
63
neighbor 10.1.2.2 remote-as external
74
neighbor 10.1.3.2 remote-as external

tests/topotests/high_ecmp/r1/frr_ipv6_bgp.conf

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
router bgp 1001
2-
timers bgp 5 20
3-
no bgp ebgp-requires-policy
4-
read-quanta 1
52
neighbor 2001:db8:1:1::2 remote-as external
63
neighbor 2001:db8:1:2::2 remote-as external
74
neighbor 2001:db8:1:3::2 remote-as external

tests/topotests/high_ecmp/r1/frr_unnumbered_bgp.conf

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
router bgp 1001
2-
timers bgp 5 20
3-
no bgp ebgp-requires-policy
4-
read-quanta 1
52
neighbor r1-eth0 interface remote-as external
63
neighbor r1-eth1 interface remote-as external
74
neighbor r1-eth2 interface remote-as external

tests/topotests/high_ecmp/r2/frr.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,3 +2577,8 @@ interface r2-eth514
25772577
ipv6 address 2001:db8:3:5::2/64
25782578
no shutdown
25792579
!
2580+
router bgp 1002
2581+
timers bgp 5 60
2582+
no bgp ebgp-requires-policy
2583+
read-quanta 1
2584+
!

tests/topotests/high_ecmp/r2/frr_ipv4_bgp.conf

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
router bgp 1002
2-
timers bgp 5 20
3-
no bgp ebgp-requires-policy
4-
read-quanta 1
52
neighbor 10.1.1.1 remote-as external
63
neighbor 10.1.2.1 remote-as external
74
neighbor 10.1.3.1 remote-as external

0 commit comments

Comments
 (0)