Skip to content

Commit 36bbef5

Browse files
borkmanndavem330
authored andcommitted
bpf: direct packet write and access for helpers for clsact progs
This work implements direct packet access for helpers and direct packet write in a similar fashion as already available for XDP types via commits 4acf6c0 ("bpf: enable direct packet data write for xdp progs") and 6841de8 ("bpf: allow helpers access the packet directly"), and as a complementary feature to the already available direct packet read for tc (cls/act) programs. For enabling this, we need to introduce two helpers, bpf_skb_pull_data() and bpf_csum_update(). The first is generally needed for both, read and write, because they would otherwise only be limited to the current linear skb head. Usually, when the data_end test fails, programs just bail out, or, in the direct read case, use bpf_skb_load_bytes() as an alternative to overcome this limitation. If such data sits in non-linear parts, we can just pull them in once with the new helper, retest and eventually access them. At the same time, this also makes sure the skb is uncloned, which is, of course, a necessary condition for direct write. As this needs to be an invariant for the write part only, the verifier detects writes and adds a prologue that is calling bpf_skb_pull_data() to effectively unclone the skb from the very beginning in case it is indeed cloned. The heuristic makes use of a similar trick that was done in 233577a ("net: filter: constify detection of pkt_type_offset"). This comes at zero cost for other programs that do not use the direct write feature. Should a program use this feature only sparsely and has read access for the most parts with, for example, drop return codes, then such write action can be delegated to a tail called program for mitigating this cost of potential uncloning to a late point in time where it would have been paid similarly with the bpf_skb_store_bytes() as well. Advantage of direct write is that the writes are inlined whereas the helper cannot make any length assumptions and thus needs to generate a call to memcpy() also for small sizes, as well as cost of helper call itself with sanity checks are avoided. Plus, when direct read is already used, we don't need to cache or perform rechecks on the data boundaries (due to verifier invalidating previous checks for helpers that change skb->data), so more complex programs using rewrites can benefit from switching to direct read plus write. For direct packet access to helpers, we save the otherwise needed copy into a temp struct sitting on stack memory when use-case allows. Both facilities are enabled via may_access_direct_pkt_data() in verifier. For now, we limit this to map helpers and csum_diff, and can successively enable other helpers where we find it makes sense. Helpers that definitely cannot be allowed for this are those part of bpf_helper_changes_skb_data() since they can change underlying data, and those that write into memory as this could happen for packet typed args when still cloned. bpf_csum_update() helper accommodates for the fact that we need to fixup checksum_complete when using direct write instead of bpf_skb_store_bytes(), meaning the programs can use available helpers like bpf_csum_diff(), and implement csum_add(), csum_sub(), csum_block_add(), csum_block_sub() equivalents in eBPF together with the new helper. A usage example will be provided for iproute2's examples/bpf/ directory. Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b399cf6 commit 36bbef5

File tree

6 files changed

+196
-34
lines changed

6 files changed

+196
-34
lines changed

include/linux/bpf.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ enum bpf_return_type {
9696
struct bpf_func_proto {
9797
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
9898
bool gpl_only;
99+
bool pkt_access;
99100
enum bpf_return_type ret_type;
100101
enum bpf_arg_type arg1_type;
101102
enum bpf_arg_type arg2_type;
@@ -151,7 +152,8 @@ struct bpf_verifier_ops {
151152
*/
152153
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
153154
enum bpf_reg_type *reg_type);
154-
155+
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
156+
const struct bpf_prog *prog);
155157
u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
156158
int src_reg, int ctx_off,
157159
struct bpf_insn *insn, struct bpf_prog *prog);

include/linux/skbuff.h

+12-2
Original file line numberDiff line numberDiff line change
@@ -676,13 +676,23 @@ struct sk_buff {
676676
*/
677677
kmemcheck_bitfield_begin(flags1);
678678
__u16 queue_mapping;
679+
680+
/* if you move cloned around you also must adapt those constants */
681+
#ifdef __BIG_ENDIAN_BITFIELD
682+
#define CLONED_MASK (1 << 7)
683+
#else
684+
#define CLONED_MASK 1
685+
#endif
686+
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)
687+
688+
__u8 __cloned_offset[0];
679689
__u8 cloned:1,
680690
nohdr:1,
681691
fclone:2,
682692
peeked:1,
683693
head_frag:1,
684-
xmit_more:1;
685-
/* one bit hole */
694+
xmit_more:1,
695+
__unused:1; /* one bit hole */
686696
kmemcheck_bitfield_end(flags1);
687697

688698
/* fields enclosed in headers_start/headers_end are copied

include/uapi/linux/bpf.h

+21
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,27 @@ enum bpf_func_id {
398398
*/
399399
BPF_FUNC_skb_change_tail,
400400

401+
/**
402+
* bpf_skb_pull_data(skb, len)
403+
* The helper will pull in non-linear data in case the
404+
* skb is non-linear and not all of len are part of the
405+
* linear section. Only needed for read/write with direct
406+
* packet access.
407+
* @skb: pointer to skb
408+
* @len: len to make read/writeable
409+
* Return: 0 on success or negative error
410+
*/
411+
BPF_FUNC_skb_pull_data,
412+
413+
/**
414+
* bpf_csum_update(skb, csum)
415+
* Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
416+
* @skb: pointer to skb
417+
* @csum: csum to add
418+
* Return: csum on success or negative error
419+
*/
420+
BPF_FUNC_csum_update,
421+
401422
__BPF_FUNC_MAX_ID,
402423
};
403424

kernel/bpf/helpers.c

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
3636
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
3737
.func = bpf_map_lookup_elem,
3838
.gpl_only = false,
39+
.pkt_access = true,
3940
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
4041
.arg1_type = ARG_CONST_MAP_PTR,
4142
.arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -51,6 +52,7 @@ BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
5152
const struct bpf_func_proto bpf_map_update_elem_proto = {
5253
.func = bpf_map_update_elem,
5354
.gpl_only = false,
55+
.pkt_access = true,
5456
.ret_type = RET_INTEGER,
5557
.arg1_type = ARG_CONST_MAP_PTR,
5658
.arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -67,6 +69,7 @@ BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
6769
const struct bpf_func_proto bpf_map_delete_elem_proto = {
6870
.func = bpf_map_delete_elem,
6971
.gpl_only = false,
72+
.pkt_access = true,
7073
.ret_type = RET_INTEGER,
7174
.arg1_type = ARG_CONST_MAP_PTR,
7275
.arg2_type = ARG_PTR_TO_MAP_KEY,

kernel/bpf/verifier.c

+40-14
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ struct verifier_env {
196196
u32 used_map_cnt; /* number of used maps */
197197
u32 id_gen; /* used to generate unique reg IDs */
198198
bool allow_ptr_leaks;
199+
bool seen_direct_write;
199200
};
200201

201202
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@@ -204,6 +205,7 @@ struct verifier_env {
204205
struct bpf_call_arg_meta {
205206
struct bpf_map *map_ptr;
206207
bool raw_mode;
208+
bool pkt_access;
207209
int regno;
208210
int access_size;
209211
};
@@ -654,10 +656,17 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
654656

655657
#define MAX_PACKET_OFF 0xffff
656658

657-
static bool may_write_pkt_data(enum bpf_prog_type type)
659+
static bool may_access_direct_pkt_data(struct verifier_env *env,
660+
const struct bpf_call_arg_meta *meta)
658661
{
659-
switch (type) {
662+
switch (env->prog->type) {
663+
case BPF_PROG_TYPE_SCHED_CLS:
664+
case BPF_PROG_TYPE_SCHED_ACT:
660665
case BPF_PROG_TYPE_XDP:
666+
if (meta)
667+
return meta->pkt_access;
668+
669+
env->seen_direct_write = true;
661670
return true;
662671
default:
663672
return false;
@@ -817,7 +826,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
817826
err = check_stack_read(state, off, size, value_regno);
818827
}
819828
} else if (state->regs[regno].type == PTR_TO_PACKET) {
820-
if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
829+
if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
821830
verbose("cannot write into packet\n");
822831
return -EACCES;
823832
}
@@ -950,8 +959,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
950959
return 0;
951960
}
952961

953-
if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
954-
verbose("helper access to the packet is not allowed for clsact\n");
962+
if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
963+
verbose("helper access to the packet is not allowed\n");
955964
return -EACCES;
956965
}
957966

@@ -1191,6 +1200,7 @@ static int check_call(struct verifier_env *env, int func_id)
11911200
changes_data = bpf_helper_changes_skb_data(fn->func);
11921201

11931202
memset(&meta, 0, sizeof(meta));
1203+
meta.pkt_access = fn->pkt_access;
11941204

11951205
/* We only support one arg being in raw mode at the moment, which
11961206
* is sufficient for the helper functions we have right now.
@@ -2675,18 +2685,35 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
26752685
*/
26762686
static int convert_ctx_accesses(struct verifier_env *env)
26772687
{
2678-
struct bpf_insn *insn = env->prog->insnsi;
2679-
int insn_cnt = env->prog->len;
2680-
struct bpf_insn insn_buf[16];
2688+
const struct bpf_verifier_ops *ops = env->prog->aux->ops;
2689+
struct bpf_insn insn_buf[16], *insn;
26812690
struct bpf_prog *new_prog;
26822691
enum bpf_access_type type;
2683-
int i;
2692+
int i, insn_cnt, cnt;
26842693

2685-
if (!env->prog->aux->ops->convert_ctx_access)
2694+
if (ops->gen_prologue) {
2695+
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
2696+
env->prog);
2697+
if (cnt >= ARRAY_SIZE(insn_buf)) {
2698+
verbose("bpf verifier is misconfigured\n");
2699+
return -EINVAL;
2700+
} else if (cnt) {
2701+
new_prog = bpf_patch_insn_single(env->prog, 0,
2702+
insn_buf, cnt);
2703+
if (!new_prog)
2704+
return -ENOMEM;
2705+
env->prog = new_prog;
2706+
}
2707+
}
2708+
2709+
if (!ops->convert_ctx_access)
26862710
return 0;
26872711

2712+
insn_cnt = env->prog->len;
2713+
insn = env->prog->insnsi;
2714+
26882715
for (i = 0; i < insn_cnt; i++, insn++) {
2689-
u32 insn_delta, cnt;
2716+
u32 insn_delta;
26902717

26912718
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
26922719
insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
@@ -2703,9 +2730,8 @@ static int convert_ctx_accesses(struct verifier_env *env)
27032730
continue;
27042731
}
27052732

2706-
cnt = env->prog->aux->ops->
2707-
convert_ctx_access(type, insn->dst_reg, insn->src_reg,
2708-
insn->off, insn_buf, env->prog);
2733+
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
2734+
insn->off, insn_buf, env->prog);
27092735
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
27102736
verbose("bpf verifier is misconfigured\n");
27112737
return -EINVAL;

0 commit comments

Comments
 (0)