Skip to content

Commit 642b081

Browse files
authored
Merge pull request #1 from FRRouting/master
Merging from upstream tree
2 parents dcc1615 + 595ad74 commit 642b081

26 files changed

+1313
-139
lines changed

bgpd/bgp_attr.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -2956,7 +2956,8 @@ void bgp_packet_mpattr_prefix(struct stream *s, afi_t afi, safi_t safi,
29562956
addpath_encode, addpath_tx_id);
29572957
} else if (safi == SAFI_LABELED_UNICAST) {
29582958
/* Prefix write with label. */
2959-
stream_put_labeled_prefix(s, p, label);
2959+
stream_put_labeled_prefix(s, p, label, addpath_encode,
2960+
addpath_tx_id);
29602961
} else if (safi == SAFI_FLOWSPEC) {
29612962
if (PSIZE (p->prefixlen)+2 < FLOWSPEC_NLRI_SIZELIMIT)
29622963
stream_putc(s, PSIZE (p->prefixlen)+2);

bgpd/bgp_lcommunity.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,8 @@ struct lcommunity *lcommunity_str2com(const char *str)
439439
enum lcommunity_token token = lcommunity_token_unknown;
440440
struct lcommunity_val lval;
441441

442-
while ((str = lcommunity_gettoken(str, &lval, &token))) {
442+
do {
443+
str = lcommunity_gettoken(str, &lval, &token);
443444
switch (token) {
444445
case lcommunity_token_val:
445446
if (lcom == NULL)
@@ -452,7 +453,8 @@ struct lcommunity *lcommunity_str2com(const char *str)
452453
lcommunity_free(&lcom);
453454
return NULL;
454455
}
455-
}
456+
} while (str);
457+
456458
return lcom;
457459
}
458460

bgpd/bgp_route.c

+7
Original file line numberDiff line numberDiff line change
@@ -5332,6 +5332,13 @@ static void bgp_purge_af_static_redist_routes(struct bgp *bgp, afi_t afi,
53325332
struct bgp_node *rn;
53335333
struct bgp_path_info *pi;
53345334

5335+
/* Do not install the aggregate route if BGP is in the
5336+
* process of termination.
5337+
*/
5338+
if (bgp_flag_check(bgp, BGP_FLAG_DELETE_IN_PROGRESS) ||
5339+
(bgp->peer_self == NULL))
5340+
return;
5341+
53355342
table = bgp->rib[afi][safi];
53365343
for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
53375344
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) {

bgpd/bgp_routemap.c

+26-23
Original file line numberDiff line numberDiff line change
@@ -3406,31 +3406,34 @@ int bgp_route_map_update_timer(struct thread *thread)
34063406

34073407
static void bgp_route_map_mark_update(const char *rmap_name)
34083408
{
3409-
if (bm->t_rmap_update == NULL) {
3410-
struct listnode *node, *nnode;
3411-
struct bgp *bgp;
3412-
3413-
/* rmap_update_timer of 0 means don't do route updates */
3414-
if (bm->rmap_update_timer) {
3415-
bm->t_rmap_update = NULL;
3416-
thread_add_timer(bm->master, bgp_route_map_update_timer,
3417-
NULL, bm->rmap_update_timer,
3418-
&bm->t_rmap_update);
3419-
3420-
/* Signal the groups that a route-map update event has
3421-
* started */
3422-
for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
3423-
update_group_policy_update(bgp,
3424-
BGP_POLICY_ROUTE_MAP,
3425-
rmap_name, 1, 1);
3426-
} else {
3427-
for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
3428-
bgp_route_map_process_update(bgp, rmap_name, 0);
3409+
struct listnode *node, *nnode;
3410+
struct bgp *bgp;
3411+
3412+
/* If new update is received before the current timer timed out,
3413+
* turn it off and start a new timer.
3414+
*/
3415+
if (bm->t_rmap_update != NULL)
3416+
THREAD_OFF(bm->t_rmap_update);
3417+
3418+
/* rmap_update_timer of 0 means don't do route updates */
3419+
if (bm->rmap_update_timer) {
3420+
thread_add_timer(bm->master, bgp_route_map_update_timer,
3421+
NULL, bm->rmap_update_timer,
3422+
&bm->t_rmap_update);
3423+
3424+
/* Signal the groups that a route-map update event has
3425+
* started */
3426+
for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
3427+
update_group_policy_update(bgp,
3428+
BGP_POLICY_ROUTE_MAP,
3429+
rmap_name, 1, 1);
3430+
} else {
3431+
for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp))
3432+
bgp_route_map_process_update(bgp, rmap_name, 0);
34293433
#if ENABLE_BGP_VNC
3430-
zlog_debug("%s: calling vnc_routemap_update", __func__);
3431-
vnc_routemap_update(bgp, __func__);
3434+
zlog_debug("%s: calling vnc_routemap_update", __func__);
3435+
vnc_routemap_update(bgp, __func__);
34323436
#endif
3433-
}
34343437
}
34353438
}
34363439

doc/developer/library.rst

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Library Facilities (libfrr)
88
:maxdepth: 2
99

1010
memtypes
11+
rcu
1112
lists
1213
logging
1314
hooks

doc/developer/rcu.rst

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
.. highlight:: c
2+
3+
RCU
4+
===
5+
6+
Introduction
7+
------------
8+
9+
RCU (Read-Copy-Update) is, fundamentally, a paradigm of multithreaded
10+
operation (and not a set of APIs.) The core ideas are:
11+
12+
* longer, complicated updates to structures are made only on private,
13+
"invisible" copies. Other threads, when they access the structure, see an
14+
older (but consistent) copy.
15+
16+
* once done, the updated copy is swapped in in a single operation so that
17+
other threads see either the old or the new data but no inconsistent state
18+
between.
19+
20+
* the old instance is only released after making sure that it is impossible
21+
any other thread might still be reading it.
22+
23+
For more information, please search for general or Linux kernel RCU
24+
documentation; there is no way this doc can be comprehensive in explaining the
25+
interactions:
26+
27+
* https://en.wikipedia.org/wiki/Read-copy-update
28+
* https://www.kernel.org/doc/html/latest/kernel-hacking/locking.html#avoiding-locks-read-copy-update
29+
* https://lwn.net/Articles/262464/
30+
* http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf
31+
* http://lse.sourceforge.net/locking/rcupdate.html
32+
33+
RCU, the TL;DR
34+
^^^^^^^^^^^^^^
35+
36+
#. data structures are always consistent for reading. That's the "R" part.
37+
#. reading never blocks / takes a lock.
38+
#. rcu_read_lock is not a lock in the traditional sense. Think of it as a
39+
"reservation"; it notes what the *oldest* possible thing the thread might
40+
be seeing is, and which thus can't be deleted yet.
41+
#. you create some object, finish it up, and then publish it.
42+
#. publishing is an ``atomic_*`` call with ``memory_order_release``, which
43+
tells the compiler to make sure prior memory writes have completed before
44+
doing the atomic op.
45+
#. ``ATOMLIST_*`` ``add`` operations do the ``memory_order_release`` for you.
46+
#. you can't touch the object after it is published, except with atomic ops.
47+
#. because you can't touch it, if you want to change it you make a new copy,
48+
work on that, and then publish the new copy. That's the "CU" part.
49+
#. deleting the object is also an atomic op.
50+
#. other threads that started working before you published / deleted an object
51+
might not see the new object / still see the deleted object.
52+
#. because other threads may still see deleted objects, the ``free()`` needs
53+
to be delayed. That's what :c:func:`rcu_free()` is for.
54+
55+
56+
When (not) to use RCU
57+
^^^^^^^^^^^^^^^^^^^^^
58+
59+
RCU is designed for read-heavy workloads where objects are updated relatively
60+
rarely, but frequently accessed. Do *not* indiscriminately replace locking by
61+
RCU patterns.
62+
63+
The "copy" part of RCU implies that, while updating, several copies of a given
64+
object exist in parallel. Even after the updated copy is swapped in, the old
65+
object remains queued for freeing until all other threads are guaranteed to
66+
not be accessing it anymore, due to passing a sequence point. In addition to
67+
the increased memory usage, there may be some bursted (due to batching) malloc
68+
contention when the RCU cleanup thread does its thing and frees memory.
69+
70+
Other useful patterns
71+
^^^^^^^^^^^^^^^^^^^^^
72+
73+
In addition to the full "copy object, apply changes, atomically update"
74+
approach, there are 2 "reduced" usage cases that can be done:
75+
76+
* atomically updating single pieces of a particular object, e.g. some flags
77+
or configuration piece
78+
79+
* straight up read-only / immutable objects
80+
81+
Both of these cases can be considered RCU "subsets". For example, when
82+
maintaining an atomic list of items, but these items only have a single
83+
integer value that needs to be updated, that value can be atomically updated
84+
without copying the entire object. However, the object still needs to be
85+
free'd through :c:func:`rcu_free()` since reading/updating and deleting might
86+
be happening concurrently. The same applies for immutable objects; deletion
87+
might still race with reading so they need to be free'd through RCU.
88+
89+
FRR API
90+
-------
91+
92+
Before diving into detail on the provided functions, it is important to note
93+
that the FRR RCU API covers the **cleanup part of RCU, not the read-copy-update
94+
paradigm itself**. These parts are handled by standard C11 atomic operations,
95+
and by extension through the atomic data structures (ATOMLIST, ATOMSORT & co.)
96+
97+
The ``rcu_*`` functions only make sense in conjunction with these RCU access
98+
patterns. If you're calling the RCU API but not using these, something is
99+
wrong. The other way around is not necessarily true; it is possible to use
100+
atomic ops & datastructures with other types of locking, e.g. rwlocks.
101+
102+
.. c:function:: void rcu_read_lock()
103+
.. c:function:: void rcu_read_unlock()
104+
105+
These functions acquire / release the RCU read-side lock. All access to
106+
RCU-guarded data must be inside a block guarded by these. Any number of
107+
threads may hold the RCU read-side lock at a given point in time, including
108+
both no threads at all and all threads.
109+
110+
The functions implement a depth counter, i.e. can be nested. The nested
111+
calls are cheap, since they only increment/decrement the counter.
112+
Therefore, any place that uses RCU data and doesn't have a guarantee that
113+
the caller holds RCU (e.g. ``lib/`` code) should just have its own
114+
rcu_read_lock/rcu_read_unlock pair.
115+
116+
At the "root" level (e.g. un-nested), these calls can incur the cost of one
117+
syscall (to ``futex()``). That puts them on about the same cost as a
118+
mutex lock/unlock.
119+
120+
The ``thread_master`` code currently always holds RCU everywhere, except
121+
while doing the actual ``poll()`` syscall. This is both an optimization as
122+
well as an "easement" into getting RCU going. The current implementation
123+
contract is that any ``struct thread *`` callback is called with a RCU
124+
holding depth of 1, and that this is owned by the thread so it may (should)
125+
drop and reacquire it when doing some longer-running work.
126+
127+
.. warning::
128+
129+
The RCU read-side lock must be held **continuously** for the entire time
130+
any piece of RCU data is used. This includes any access to RCU data
131+
after the initial ``atomic_load``. If the RCU read-side lock is
132+
released, any RCU-protected pointers as well as the data they refer to
133+
become invalid, as another thread may have called :c:func:`rcu_free` on
134+
them.
135+
136+
.. c:type:: struct rcu_head
137+
.. c:type:: struct rcu_head_close
138+
.. c:type:: struct rcu_action
139+
140+
The ``rcu_head`` structures are small (16-byte) bits that contain the
141+
queueing machinery for the RCU sweeper/cleanup mechanisms.
142+
143+
Any piece of data that is cleaned up by RCU needs to have a matching
144+
``rcu_head`` embedded in it. If there is more than one cleanup operation
145+
to be done (e.g. closing a file descriptor), more than one ``rcu_head`` may
146+
be embedded.
147+
148+
.. warning::
149+
150+
It is not possible to reuse a ``rcu_head``. It is owned by the RCU code
151+
as soon as ``rcu_*`` is called on it.
152+
153+
The ``_close`` variant carries an extra ``int fd`` field to store the fd to
154+
be closed.
155+
156+
To minimize the amount of memory used for ``rcu_head``, details about the
157+
RCU operation to be performed are moved into the ``rcu_action`` structure.
158+
It contains e.g. the MTYPE for :c:func:`rcu_free` calls. The pointer to be
159+
freed is stored as an offset relative to the ``rcu_head``, which means it
160+
must be embedded as a struct field so the offset is constant.
161+
162+
The ``rcu_action`` structure is an implementation detail. Using
163+
``rcu_free`` or ``rcu_close`` will set it up correctly without further
164+
code needed.
165+
166+
The ``rcu_head`` may be put in an union with other data if the other data
167+
is only used during "life" of the data, since the ``rcu_head`` is used only
168+
for the "death" of data. But note that other threads may still be reading
169+
a piece of data while a thread is working to free it.
170+
171+
.. c:function:: void rcu_free(struct memtype *mtype, struct X *ptr, field)
172+
173+
Free a block of memory after RCU has ensured no other thread can be
174+
accessing it anymore. The pointer remains valid for any other thread that
175+
has called :c:func:`rcu_read_lock` before the ``rcu_free`` call.
176+
177+
.. warning::
178+
179+
In some other RCU implementations, the pointer remains valid to the
180+
*calling* thread if it is holding the RCU read-side lock. This is not
181+
the case in FRR, particularly when running single-threaded. Enforcing
182+
this rule also allows static analysis to find use-after-free issues.
183+
184+
``mtype`` is the libfrr ``MTYPE_FOO`` allocation type to pass to
185+
:c:func:`XFREE`.
186+
187+
``field`` must be the name of a ``struct rcu_head`` member field in ``ptr``.
188+
The offset of this field (which must be constant) is used to reduce the
189+
memory size of ``struct rcu_head``.
190+
191+
.. note::
192+
193+
``rcu_free`` (and ``rcu_close``) calls are more efficient if they are
194+
put close to each other. When freeing several RCU'd resources, try to
195+
move the calls next to each other (even if the data structures do not
196+
directly point to each other.)
197+
198+
Having the calls bundled reduces the cost of adding the ``rcu_head`` to
199+
the RCU queue; the RCU queue is an atomic data structure whose usage
200+
will require the CPU to acquire an exclusive hold on relevant cache
201+
lines.
202+
203+
.. c:function:: void rcu_close(struct rcu_head_close *head, int fd)
204+
205+
Close a file descriptor after ensuring no other thread might be using it
206+
anymore. Same as :c:func:`rcu_free`, except it calls ``close`` instead of
207+
``free``.
208+
209+
Internals
210+
^^^^^^^^^
211+
212+
.. c:type:: struct rcu_thread
213+
214+
Per-thread state maintained by the RCU code, set up by the following
215+
functions. A pointer to a thread's own ``rcu_thread`` is saved in
216+
thread-local storage.
217+
218+
.. c:function:: struct rcu_thread *rcu_thread_prepare(void)
219+
.. c:function:: void rcu_thread_unprepare(struct rcu_thread *rcu_thread)
220+
.. c:function:: void rcu_thread_start(struct rcu_thread *rcu_thread)
221+
222+
Since the RCU code needs to have a list of all active threads, these
223+
functions are used by the ``frr_pthread`` code to set up threads. Teardown
224+
is automatic. It should not be necessary to call these functions.
225+
226+
Any thread that accesses RCU-protected data needs to be registered with
227+
these functions. Threads that do not access RCU-protected data may call
228+
these functions but do not need to.
229+
230+
Note that passing a pointer to RCU-protected data to some library which
231+
accesses that pointer makes the library "access RCU-protected data". In
232+
that case, either all of the library's threads must be registered for RCU,
233+
or the code must instead pass a (non-RCU) copy of the data to the library.
234+
235+
.. c:function:: void rcu_shutdown(void)
236+
237+
Stop the RCU sweeper thread and make sure all cleanup has finished.
238+
239+
This function is called on daemon exit by the libfrr code to ensure pending
240+
RCU operations are completed. This is mostly to get a clean exit without
241+
memory leaks from queued RCU operations. It should not be necessary to
242+
call this function as libfrr handles this.
243+
244+
FRR specifics and implementation details
245+
----------------------------------------
246+
247+
The FRR RCU infrastructure has the following characteristics:
248+
249+
* it is Epoch-based with a 32-bit wrapping counter. (This is somewhat
250+
different from other Epoch-based approaches which may be designed to only
251+
use 3 counter values, but works out to a simple implementation.)
252+
253+
* instead of tracking CPUs as the Linux kernel does, threads are tracked. This
254+
has exactly zero semantic impact, RCU just cares about "threads of
255+
execution", which the kernel can optimize to CPUs but we can't. But it
256+
really boils down to the same thing.
257+
258+
* there are no ``rcu_dereference`` and ``rcu_assign_pointer`` - use
259+
``atomic_load`` and ``atomic_store`` instead. (These didn't exist when the
260+
Linux RCU code was created.)
261+
262+
* there is no ``synchronize_rcu``; this is a design choice but may be revisited
263+
at a later point. ``synchronize_rcu`` blocks a thread until it is guaranteed
264+
that no other threads might still be accessing data structures that they may
265+
have access to at the beginning of the function call. This is a blocking
266+
design and probably not appropriate for FRR. Instead, ``rcu_call`` can be
267+
used to have the RCU sweeper thread make a callback after the same constraint
268+
is fulfilled in an asynchronous way. Most needs should be covered by
269+
``rcu_free`` and ``rcu_close``.

doc/developer/subdir.am

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ dev_RSTFILES = \
4242
doc/developer/packaging-debian.rst \
4343
doc/developer/packaging-redhat.rst \
4444
doc/developer/packaging.rst \
45+
doc/developer/rcu.rst \
4546
doc/developer/testing.rst \
4647
doc/developer/topotests-snippets.rst \
4748
doc/developer/topotests.rst \

0 commit comments

Comments
 (0)