Skip to content

Commit 829dcde

Browse files
authored
Apply Zebra fpm backpressure patches to dplane_fpm_sonic (#21356)
<!-- Please make sure you've read and understood our contributing guidelines: https://github.com/Azure/SONiC/blob/gh-pages/CONTRIBUTING.md failure_prs.log skip_prs.log Make sure all your commits include a signature generated with `git commit -s` ** If this is a bug fix, make sure your description includes "fixes #xxxx", or "closes #xxxx" or "resolves #xxxx" Please provide the following information: --> #### Why I did it Reduce high CPU usage on zebra after performing port toggle on all interfaces simultaneously #### How I did it Apply zebra fpm backpressure patches from FRR mainline to dplane_fpm_sonic: * zebra: Use built in data structure counter (FRRouting/frr#16221) * Zebra fpm backpressure (FRRouting/frr#16220) <!-- #### How to verify it If PR needs to be backported, then the PR must be tested against the base branch and the earliest backport release branch and provide tested image version on these two branches. For example, if the PR is requested for master, 202211 and 202012, then the requester needs to provide test results on master and 202012. --> <!-- #### Which release branch to backport (provide reason below if selected) - Note we only backport fixes to a release branch, *not* features! - Please also provide a reason for the backporting below. - e.g. - [x] 202006 - [ ] 201811 - [ ] 201911 - [ ] 202006 - [ ] 202012 - [ ] 202106 - [ ] 202111 - [ ] 202205 - [ ] 202211 - [ ] 202305 --> <!-- #### Tested branch (Please provide the tested image version) - Please provide tested image version - e.g. - [x] 20201231.100 - [ ] - [ ] --> <!-- #### Description for the changelog Write a short (one line) summary that describes the changes in this pull request for inclusion in the changelog: --> <!-- Ensure to add label/tag for the feature raised. example - PR#2174 under sonic-utilities repo. where, Generic Config and Update feature has been labelled as GCU. --> <!-- #### Link to config_db schema for YANG module changes Provide a link to config_db schema for the table for which YANG model is defined Link should point to correct section on https://github.com/Azure/sonic-buildimage/blob/master/src/sonic-yang-models/doc/Configuration.md --> <!-- #### A picture of a cute animal (not mandatory but encouraged) -->
1 parent 2e5e624 commit 829dcde

File tree

1 file changed

+38
-23
lines changed

1 file changed

+38
-23
lines changed

src/sonic-frr/dplane_fpm_sonic/dplane_fpm_sonic.c

+38-23
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,6 @@ struct fpm_nl_ctx {
204204

205205
/* Amount of data plane context processed. */
206206
_Atomic uint32_t dplane_contexts;
207-
/* Amount of data plane contexts enqueued. */
208-
_Atomic uint32_t ctxqueue_len;
209207
/* Peak amount of data plane contexts enqueued. */
210208
_Atomic uint32_t ctxqueue_len_peak;
211209

@@ -380,6 +378,12 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
380378
FPM_STR
381379
"FPM statistic counters\n")
382380
{
381+
uint32_t curr_queue_len;
382+
383+
frr_with_mutex (&gfnc->ctxqueue_mutex) {
384+
curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
385+
}
386+
383387
vty_out(vty, "%30s\n%30s\n", "FPM counters", "============");
384388

385389
#define SHOW_COUNTER(label, counter) \
@@ -393,8 +397,7 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
393397
SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors);
394398
SHOW_COUNTER("Data plane items processed",
395399
gfnc->counters.dplane_contexts);
396-
SHOW_COUNTER("Data plane items enqueued",
397-
gfnc->counters.ctxqueue_len);
400+
SHOW_COUNTER("Data plane items enqueued", curr_queue_len);
398401
SHOW_COUNTER("Data plane items queue peak",
399402
gfnc->counters.ctxqueue_len_peak);
400403
SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full);
@@ -413,6 +416,12 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
413416
"FPM statistic counters\n"
414417
JSON_STR)
415418
{
419+
uint32_t curr_queue_len;
420+
421+
frr_with_mutex (&gfnc->ctxqueue_mutex) {
422+
curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
423+
}
424+
416425
struct json_object *jo;
417426

418427
jo = json_object_new_object();
@@ -426,8 +435,7 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
426435
gfnc->counters.connection_errors);
427436
json_object_int_add(jo, "data-plane-contexts",
428437
gfnc->counters.dplane_contexts);
429-
json_object_int_add(jo, "data-plane-contexts-queue",
430-
gfnc->counters.ctxqueue_len);
438+
json_object_int_add(jo, "data-plane-contexts-queue", curr_queue_len);
431439
json_object_int_add(jo, "data-plane-contexts-queue-peak",
432440
gfnc->counters.ctxqueue_len_peak);
433441
json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full);
@@ -1313,7 +1321,7 @@ static ssize_t netlink_srv6_vpn_route_msg_encode(int cmd,
13131321
&encap_src_addr, IPV6_MAX_BYTELEN))
13141322
return false;
13151323
if (!nl_attr_put(&req->n, datalen, FPM_ROUTE_ENCAP_SRV6_VPN_SID,
1316-
&nexthop->nh_srv6->seg6_segs,
1324+
&nexthop->nh_srv6->seg6_segs->seg[0],
13171325
IPV6_MAX_BYTELEN))
13181326
return false;
13191327
nl_attr_nest_end(&req->n, nest);
@@ -1992,8 +2000,6 @@ static void fpm_process_queue(struct event *t)
19922000

19932001
/* Account the processed entries. */
19942002
processed_contexts++;
1995-
atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1,
1996-
memory_order_relaxed);
19972003

19982004
dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
19992005
dplane_provider_enqueue_out_ctx(fnc->prov, ctx);
@@ -2162,10 +2168,29 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
21622168
struct zebra_dplane_ctx *ctx;
21632169
struct fpm_nl_ctx *fnc;
21642170
int counter, limit;
2165-
uint64_t cur_queue, peak_queue = 0, stored_peak_queue;
2171+
uint64_t cur_queue = 0, peak_queue = 0, stored_peak_queue;
21662172

21672173
fnc = dplane_provider_get_data(prov);
21682174
limit = dplane_provider_get_work_limit(prov);
2175+
2176+
frr_with_mutex (&fnc->ctxqueue_mutex) {
2177+
cur_queue = dplane_ctx_queue_count(&fnc->ctxqueue);
2178+
}
2179+
2180+
if (cur_queue >= (uint64_t)limit) {
2181+
if (IS_ZEBRA_DEBUG_FPM)
2182+
zlog_debug("%s: Already at a limit(%" PRIu64
2183+
") of internal work, hold off",
2184+
__func__, cur_queue);
2185+
limit = 0;
2186+
} else {
2187+
if (IS_ZEBRA_DEBUG_FPM)
2188+
zlog_debug("%s: current queue is %" PRIu64
2189+
", limiting to lesser amount of %" PRIu64,
2190+
__func__, cur_queue, limit - cur_queue);
2191+
limit -= cur_queue;
2192+
}
2193+
21692194
for (counter = 0; counter < limit; counter++) {
21702195
ctx = dplane_provider_dequeue_in_ctx(prov);
21712196
if (ctx == NULL)
@@ -2176,20 +2201,12 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
21762201
* anyway.
21772202
*/
21782203
if (fnc->socket != -1 && fnc->connecting == false) {
2179-
/*
2180-
* Update the number of queued contexts *before*
2181-
* enqueueing, to ensure counter consistency.
2182-
*/
2183-
atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len,
2184-
1, memory_order_relaxed);
2185-
21862204
frr_with_mutex (&fnc->ctxqueue_mutex) {
21872205
dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx);
2206+
cur_queue =
2207+
dplane_ctx_queue_count(&fnc->ctxqueue);
21882208
}
21892209

2190-
cur_queue = atomic_load_explicit(
2191-
&fnc->counters.ctxqueue_len,
2192-
memory_order_relaxed);
21932210
if (peak_queue < cur_queue)
21942211
peak_queue = cur_queue;
21952212
continue;
@@ -2206,9 +2223,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
22062223
atomic_store_explicit(&fnc->counters.ctxqueue_len_peak,
22072224
peak_queue, memory_order_relaxed);
22082225

2209-
if (atomic_load_explicit(&fnc->counters.ctxqueue_len,
2210-
memory_order_relaxed)
2211-
> 0)
2226+
if (cur_queue > 0)
22122227
event_add_timer(fnc->fthread->master, fpm_process_queue,
22132228
fnc, 0, &fnc->t_dequeue);
22142229

0 commit comments

Comments
 (0)