Skip to content

Commit 6fff607

Browse files
jrfastabborkmann
authored andcommitted
bpf: sk_msg program helper bpf_msg_push_data
This allows user to push data into a msg using sk_msg program types. The format is as follows, bpf_msg_push_data(msg, offset, len, flags) this will insert 'len' bytes at offset 'offset'. For example to prepend 10 bytes at the front of the message the user can, bpf_msg_push_data(msg, 0, 10, 0); This will invalidate data bounds so BPF user will have to then recheck data bounds after calling this. After this the msg size will have been updated and the user is free to write into the added bytes. We allow any offset/len as long as it is within the (data, data_end) range. However, a copy will be required if the ring is full and its possible for the helper to fail with ENOMEM or EINVAL errors which need to be handled by the BPF program. This can be used similar to XDP metadata to pass data between sk_msg layer and lower layers. Signed-off-by: John Fastabend <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 5032d07 commit 6fff607

File tree

3 files changed

+158
-1
lines changed

3 files changed

+158
-1
lines changed

include/linux/skmsg.h

+5
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
207207
return &msg->sg.data[which];
208208
}
209209

210+
static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
211+
{
212+
return msg->sg.data[which];
213+
}
214+
210215
static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
211216
{
212217
return sg_page(sk_msg_elem(msg, which));

include/uapi/linux/bpf.h

+19-1
Original file line numberDiff line numberDiff line change
@@ -2240,6 +2240,23 @@ union bpf_attr {
22402240
* pointer that was returned from bpf_sk_lookup_xxx\ ().
22412241
* Return
22422242
* 0 on success, or a negative error in case of failure.
2243+
*
2244+
* int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
2245+
* Description
2246+
* For socket policies, insert *len* bytes into msg at offset
2247+
* *start*.
2248+
*
2249+
* If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
2250+
* *msg* it may want to insert metadata or options into the msg.
2251+
* This can later be read and used by any of the lower layer BPF
2252+
* hooks.
2253+
*
2254+
* This helper may fail if under memory pressure (a malloc
2255+
* fails) in these cases BPF programs will get an appropriate
2256+
* error and BPF programs will need to handle them.
2257+
*
2258+
* Return
2259+
* 0 on success, or a negative error in case of failure.
22432260
*/
22442261
#define __BPF_FUNC_MAPPER(FN) \
22452262
FN(unspec), \
@@ -2331,7 +2348,8 @@ union bpf_attr {
23312348
FN(sk_release), \
23322349
FN(map_push_elem), \
23332350
FN(map_pop_elem), \
2334-
FN(map_peek_elem),
2351+
FN(map_peek_elem), \
2352+
FN(msg_push_data),
23352353

23362354
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
23372355
* function eBPF program intends to call

net/core/filter.c

+134
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,137 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = {
22972297
.arg4_type = ARG_ANYTHING,
22982298
};
22992299

2300+
BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2301+
u32, len, u64, flags)
2302+
{
2303+
struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2304+
u32 new, i = 0, l, space, copy = 0, offset = 0;
2305+
u8 *raw, *to, *from;
2306+
struct page *page;
2307+
2308+
if (unlikely(flags))
2309+
return -EINVAL;
2310+
2311+
/* First find the starting scatterlist element */
2312+
i = msg->sg.start;
2313+
do {
2314+
l = sk_msg_elem(msg, i)->length;
2315+
2316+
if (start < offset + l)
2317+
break;
2318+
offset += l;
2319+
sk_msg_iter_var_next(i);
2320+
} while (i != msg->sg.end);
2321+
2322+
if (start >= offset + l)
2323+
return -EINVAL;
2324+
2325+
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2326+
2327+
/* If no space available will fallback to copy, we need at
2328+
* least one scatterlist elem available to push data into
2329+
* when start aligns to the beginning of an element or two
2330+
* when it falls inside an element. We handle the start equals
2331+
* offset case because its the common case for inserting a
2332+
* header.
2333+
*/
2334+
if (!space || (space == 1 && start != offset))
2335+
copy = msg->sg.data[i].length;
2336+
2337+
page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2338+
get_order(copy + len));
2339+
if (unlikely(!page))
2340+
return -ENOMEM;
2341+
2342+
if (copy) {
2343+
int front, back;
2344+
2345+
raw = page_address(page);
2346+
2347+
psge = sk_msg_elem(msg, i);
2348+
front = start - offset;
2349+
back = psge->length - front;
2350+
from = sg_virt(psge);
2351+
2352+
if (front)
2353+
memcpy(raw, from, front);
2354+
2355+
if (back) {
2356+
from += front;
2357+
to = raw + front + len;
2358+
2359+
memcpy(to, from, back);
2360+
}
2361+
2362+
put_page(sg_page(psge));
2363+
} else if (start - offset) {
2364+
psge = sk_msg_elem(msg, i);
2365+
rsge = sk_msg_elem_cpy(msg, i);
2366+
2367+
psge->length = start - offset;
2368+
rsge.length -= psge->length;
2369+
rsge.offset += start;
2370+
2371+
sk_msg_iter_var_next(i);
2372+
sg_unmark_end(psge);
2373+
sk_msg_iter_next(msg, end);
2374+
}
2375+
2376+
/* Slot(s) to place newly allocated data */
2377+
new = i;
2378+
2379+
/* Shift one or two slots as needed */
2380+
if (!copy) {
2381+
sge = sk_msg_elem_cpy(msg, i);
2382+
2383+
sk_msg_iter_var_next(i);
2384+
sg_unmark_end(&sge);
2385+
sk_msg_iter_next(msg, end);
2386+
2387+
nsge = sk_msg_elem_cpy(msg, i);
2388+
if (rsge.length) {
2389+
sk_msg_iter_var_next(i);
2390+
nnsge = sk_msg_elem_cpy(msg, i);
2391+
}
2392+
2393+
while (i != msg->sg.end) {
2394+
msg->sg.data[i] = sge;
2395+
sge = nsge;
2396+
sk_msg_iter_var_next(i);
2397+
if (rsge.length) {
2398+
nsge = nnsge;
2399+
nnsge = sk_msg_elem_cpy(msg, i);
2400+
} else {
2401+
nsge = sk_msg_elem_cpy(msg, i);
2402+
}
2403+
}
2404+
}
2405+
2406+
/* Place newly allocated data buffer */
2407+
sk_mem_charge(msg->sk, len);
2408+
msg->sg.size += len;
2409+
msg->sg.copy[new] = false;
2410+
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2411+
if (rsge.length) {
2412+
get_page(sg_page(&rsge));
2413+
sk_msg_iter_var_next(new);
2414+
msg->sg.data[new] = rsge;
2415+
}
2416+
2417+
sk_msg_compute_data_pointers(msg);
2418+
return 0;
2419+
}
2420+
2421+
static const struct bpf_func_proto bpf_msg_push_data_proto = {
2422+
.func = bpf_msg_push_data,
2423+
.gpl_only = false,
2424+
.ret_type = RET_INTEGER,
2425+
.arg1_type = ARG_PTR_TO_CTX,
2426+
.arg2_type = ARG_ANYTHING,
2427+
.arg3_type = ARG_ANYTHING,
2428+
.arg4_type = ARG_ANYTHING,
2429+
};
2430+
23002431
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
23012432
{
23022433
return task_get_classid(skb);
@@ -4854,6 +4985,7 @@ bool bpf_helper_changes_pkt_data(void *func)
48544985
func == bpf_xdp_adjust_head ||
48554986
func == bpf_xdp_adjust_meta ||
48564987
func == bpf_msg_pull_data ||
4988+
func == bpf_msg_push_data ||
48574989
func == bpf_xdp_adjust_tail ||
48584990
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
48594991
func == bpf_lwt_seg6_store_bytes ||
@@ -5130,6 +5262,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
51305262
return &bpf_msg_cork_bytes_proto;
51315263
case BPF_FUNC_msg_pull_data:
51325264
return &bpf_msg_pull_data_proto;
5265+
case BPF_FUNC_msg_push_data:
5266+
return &bpf_msg_push_data_proto;
51335267
case BPF_FUNC_get_local_storage:
51345268
return &bpf_get_local_storage_proto;
51355269
default:

0 commit comments

Comments
 (0)