Skip to content

Commit d58e468

Browse files
Petar PenkovAlexei Starovoitov
Petar Penkov
authored and
Alexei Starovoitov
committed
flow_dissector: implements flow dissector BPF hook
Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector path. The BPF program is per-network namespace. Signed-off-by: Petar Penkov <[email protected]> Signed-off-by: Willem de Bruijn <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 1edb6e0 commit d58e468

File tree

10 files changed

+291
-3
lines changed

10 files changed

+291
-3
lines changed

include/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ enum bpf_reg_type {
212212
PTR_TO_PACKET_META, /* skb->data - meta_len */
213213
PTR_TO_PACKET, /* reg points to skb->data */
214214
PTR_TO_PACKET_END, /* skb->data + headlen */
215+
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
215216
};
216217

217218
/* The information passed from prog-specific *_is_valid_access

include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
3232
#ifdef CONFIG_INET
3333
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
3434
#endif
35+
BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
3536

3637
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
3738
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)

include/linux/skbuff.h

+7
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ struct scatterlist;
243243
struct pipe_inode_info;
244244
struct iov_iter;
245245
struct napi_struct;
246+
struct bpf_prog;
247+
union bpf_attr;
246248

247249
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
248250
struct nf_conntrack {
@@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
11921194
const struct flow_dissector_key *key,
11931195
unsigned int key_count);
11941196

1197+
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
1198+
struct bpf_prog *prog);
1199+
1200+
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
1201+
11951202
bool __skb_flow_dissect(const struct sk_buff *skb,
11961203
struct flow_dissector *flow_dissector,
11971204
void *target_container,

include/net/net_namespace.h

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct ctl_table_header;
4343
struct net_generic;
4444
struct uevent_sock;
4545
struct netns_ipvs;
46+
struct bpf_prog;
4647

4748

4849
#define NETDEV_HASHBITS 8
@@ -145,6 +146,8 @@ struct net {
145146
#endif
146147
struct net_generic __rcu *gen;
147148

149+
struct bpf_prog __rcu *flow_dissector_prog;
150+
148151
/* Note : following structs are cache line aligned */
149152
#ifdef CONFIG_XFRM
150153
struct netns_xfrm xfrm;

include/net/sch_generic.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct Qdisc_ops;
1919
struct qdisc_walker;
2020
struct tcf_walker;
2121
struct module;
22+
struct bpf_flow_keys;
2223

2324
typedef int tc_setup_cb_t(enum tc_setup_type type,
2425
void *type_data, void *cb_priv);
@@ -307,9 +308,14 @@ struct tcf_proto {
307308
};
308309

309310
struct qdisc_skb_cb {
310-
unsigned int pkt_len;
311-
u16 slave_dev_queue_mapping;
312-
u16 tc_classid;
311+
union {
312+
struct {
313+
unsigned int pkt_len;
314+
u16 slave_dev_queue_mapping;
315+
u16 tc_classid;
316+
};
317+
struct bpf_flow_keys *flow_keys;
318+
};
313319
#define QDISC_CB_PRIV_LEN 20
314320
unsigned char data[QDISC_CB_PRIV_LEN];
315321
};

include/uapi/linux/bpf.h

+26
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ enum bpf_prog_type {
152152
BPF_PROG_TYPE_LWT_SEG6LOCAL,
153153
BPF_PROG_TYPE_LIRC_MODE2,
154154
BPF_PROG_TYPE_SK_REUSEPORT,
155+
BPF_PROG_TYPE_FLOW_DISSECTOR,
155156
};
156157

157158
enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
172173
BPF_CGROUP_UDP4_SENDMSG,
173174
BPF_CGROUP_UDP6_SENDMSG,
174175
BPF_LIRC_MODE2,
176+
BPF_FLOW_DISSECTOR,
175177
__MAX_BPF_ATTACH_TYPE
176178
};
177179

@@ -2333,6 +2335,7 @@ struct __sk_buff {
23332335
/* ... here. */
23342336

23352337
__u32 data_meta;
2338+
struct bpf_flow_keys *flow_keys;
23362339
};
23372340

23382341
struct bpf_tunnel_key {
@@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
27782781
BPF_FD_TYPE_URETPROBE, /* filename + offset */
27792782
};
27802783

2784+
struct bpf_flow_keys {
2785+
__u16 nhoff;
2786+
__u16 thoff;
2787+
__u16 addr_proto; /* ETH_P_* of valid addrs */
2788+
__u8 is_frag;
2789+
__u8 is_first_frag;
2790+
__u8 is_encap;
2791+
__u8 ip_proto;
2792+
__be16 n_proto;
2793+
__be16 sport;
2794+
__be16 dport;
2795+
union {
2796+
struct {
2797+
__be32 ipv4_src;
2798+
__be32 ipv4_dst;
2799+
};
2800+
struct {
2801+
__u32 ipv6_src[4]; /* in6_addr; network order */
2802+
__u32 ipv6_dst[4]; /* in6_addr; network order */
2803+
};
2804+
};
2805+
};
2806+
27812807
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/syscall.c

+8
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
16151615
case BPF_LIRC_MODE2:
16161616
ptype = BPF_PROG_TYPE_LIRC_MODE2;
16171617
break;
1618+
case BPF_FLOW_DISSECTOR:
1619+
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
1620+
break;
16181621
default:
16191622
return -EINVAL;
16201623
}
@@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
16361639
case BPF_PROG_TYPE_LIRC_MODE2:
16371640
ret = lirc_prog_attach(attr, prog);
16381641
break;
1642+
case BPF_PROG_TYPE_FLOW_DISSECTOR:
1643+
ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
1644+
break;
16391645
default:
16401646
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
16411647
}
@@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
16881694
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
16891695
case BPF_LIRC_MODE2:
16901696
return lirc_prog_detach(attr);
1697+
case BPF_FLOW_DISSECTOR:
1698+
return skb_flow_dissector_bpf_prog_detach(attr);
16911699
default:
16921700
return -EINVAL;
16931701
}

kernel/bpf/verifier.c

+32
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
261261
[PTR_TO_PACKET] = "pkt",
262262
[PTR_TO_PACKET_META] = "pkt_meta",
263263
[PTR_TO_PACKET_END] = "pkt_end",
264+
[PTR_TO_FLOW_KEYS] = "flow_keys",
264265
};
265266

266267
static char slot_type_char[] = {
@@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
965966
case PTR_TO_PACKET:
966967
case PTR_TO_PACKET_META:
967968
case PTR_TO_PACKET_END:
969+
case PTR_TO_FLOW_KEYS:
968970
case CONST_PTR_TO_MAP:
969971
return true;
970972
default:
@@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
12381240
case BPF_PROG_TYPE_LWT_XMIT:
12391241
case BPF_PROG_TYPE_SK_SKB:
12401242
case BPF_PROG_TYPE_SK_MSG:
1243+
case BPF_PROG_TYPE_FLOW_DISSECTOR:
12411244
if (meta)
12421245
return meta->pkt_access;
12431246

@@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
13211324
return -EACCES;
13221325
}
13231326

1327+
static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1328+
int size)
1329+
{
1330+
if (size < 0 || off < 0 ||
1331+
(u64)off + size > sizeof(struct bpf_flow_keys)) {
1332+
verbose(env, "invalid access to flow keys off=%d size=%d\n",
1333+
off, size);
1334+
return -EACCES;
1335+
}
1336+
return 0;
1337+
}
1338+
13241339
static bool __is_pointer_value(bool allow_ptr_leaks,
13251340
const struct bpf_reg_state *reg)
13261341
{
@@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
14221437
* right in front, treat it the very same way.
14231438
*/
14241439
return check_pkt_ptr_alignment(env, reg, off, size, strict);
1440+
case PTR_TO_FLOW_KEYS:
1441+
pointer_desc = "flow keys ";
1442+
break;
14251443
case PTR_TO_MAP_VALUE:
14261444
pointer_desc = "value ";
14271445
break;
@@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
16921710
err = check_packet_access(env, regno, off, size, false);
16931711
if (!err && t == BPF_READ && value_regno >= 0)
16941712
mark_reg_unknown(env, regs, value_regno);
1713+
} else if (reg->type == PTR_TO_FLOW_KEYS) {
1714+
if (t == BPF_WRITE && value_regno >= 0 &&
1715+
is_pointer_value(env, value_regno)) {
1716+
verbose(env, "R%d leaks addr into flow keys\n",
1717+
value_regno);
1718+
return -EACCES;
1719+
}
1720+
1721+
err = check_flow_keys_access(env, off, size);
1722+
if (!err && t == BPF_READ && value_regno >= 0)
1723+
mark_reg_unknown(env, regs, value_regno);
16951724
} else {
16961725
verbose(env, "R%d invalid mem access '%s'\n", regno,
16971726
reg_type_str[reg->type]);
@@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
18391868
case PTR_TO_PACKET_META:
18401869
return check_packet_access(env, regno, reg->off, access_size,
18411870
zero_size_allowed);
1871+
case PTR_TO_FLOW_KEYS:
1872+
return check_flow_keys_access(env, reg->off, access_size);
18421873
case PTR_TO_MAP_VALUE:
18431874
return check_map_access(env, regno, reg->off, access_size,
18441875
zero_size_allowed);
@@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
43664397
case PTR_TO_CTX:
43674398
case CONST_PTR_TO_MAP:
43684399
case PTR_TO_PACKET_END:
4400+
case PTR_TO_FLOW_KEYS:
43694401
/* Only valid matches are exact, which memcmp() above
43704402
* would have accepted
43714403
*/

net/core/filter.c

+70
Original file line numberDiff line numberDiff line change
@@ -5123,6 +5123,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
51235123
}
51245124
}
51255125

5126+
static const struct bpf_func_proto *
5127+
flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5128+
{
5129+
switch (func_id) {
5130+
case BPF_FUNC_skb_load_bytes:
5131+
return &bpf_skb_load_bytes_proto;
5132+
default:
5133+
return bpf_base_func_proto(func_id);
5134+
}
5135+
}
5136+
51265137
static const struct bpf_func_proto *
51275138
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
51285139
{
@@ -5241,6 +5252,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
52415252
if (size != size_default)
52425253
return false;
52435254
break;
5255+
case bpf_ctx_range(struct __sk_buff, flow_keys):
5256+
if (size != sizeof(struct bpf_flow_keys *))
5257+
return false;
5258+
break;
52445259
default:
52455260
/* Only narrow read access allowed for now. */
52465261
if (type == BPF_WRITE) {
@@ -5266,6 +5281,7 @@ static bool sk_filter_is_valid_access(int off, int size,
52665281
case bpf_ctx_range(struct __sk_buff, data):
52675282
case bpf_ctx_range(struct __sk_buff, data_meta):
52685283
case bpf_ctx_range(struct __sk_buff, data_end):
5284+
case bpf_ctx_range(struct __sk_buff, flow_keys):
52695285
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
52705286
return false;
52715287
}
@@ -5291,6 +5307,7 @@ static bool lwt_is_valid_access(int off, int size,
52915307
case bpf_ctx_range(struct __sk_buff, tc_classid):
52925308
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
52935309
case bpf_ctx_range(struct __sk_buff, data_meta):
5310+
case bpf_ctx_range(struct __sk_buff, flow_keys):
52945311
return false;
52955312
}
52965313

@@ -5501,6 +5518,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
55015518
case bpf_ctx_range(struct __sk_buff, data_end):
55025519
info->reg_type = PTR_TO_PACKET_END;
55035520
break;
5521+
case bpf_ctx_range(struct __sk_buff, flow_keys):
55045522
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
55055523
return false;
55065524
}
@@ -5702,6 +5720,7 @@ static bool sk_skb_is_valid_access(int off, int size,
57025720
switch (off) {
57035721
case bpf_ctx_range(struct __sk_buff, tc_classid):
57045722
case bpf_ctx_range(struct __sk_buff, data_meta):
5723+
case bpf_ctx_range(struct __sk_buff, flow_keys):
57055724
return false;
57065725
}
57075726

@@ -5761,6 +5780,39 @@ static bool sk_msg_is_valid_access(int off, int size,
57615780
return true;
57625781
}
57635782

5783+
static bool flow_dissector_is_valid_access(int off, int size,
5784+
enum bpf_access_type type,
5785+
const struct bpf_prog *prog,
5786+
struct bpf_insn_access_aux *info)
5787+
{
5788+
if (type == BPF_WRITE) {
5789+
switch (off) {
5790+
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
5791+
break;
5792+
default:
5793+
return false;
5794+
}
5795+
}
5796+
5797+
switch (off) {
5798+
case bpf_ctx_range(struct __sk_buff, data):
5799+
info->reg_type = PTR_TO_PACKET;
5800+
break;
5801+
case bpf_ctx_range(struct __sk_buff, data_end):
5802+
info->reg_type = PTR_TO_PACKET_END;
5803+
break;
5804+
case bpf_ctx_range(struct __sk_buff, flow_keys):
5805+
info->reg_type = PTR_TO_FLOW_KEYS;
5806+
break;
5807+
case bpf_ctx_range(struct __sk_buff, tc_classid):
5808+
case bpf_ctx_range(struct __sk_buff, data_meta):
5809+
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
5810+
return false;
5811+
}
5812+
5813+
return bpf_skb_is_valid_access(off, size, type, prog, info);
5814+
}
5815+
57645816
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
57655817
const struct bpf_insn *si,
57665818
struct bpf_insn *insn_buf,
@@ -6055,6 +6107,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
60556107
bpf_target_off(struct sock_common,
60566108
skc_num, 2, target_size));
60576109
break;
6110+
6111+
case offsetof(struct __sk_buff, flow_keys):
6112+
off = si->off;
6113+
off -= offsetof(struct __sk_buff, flow_keys);
6114+
off += offsetof(struct sk_buff, cb);
6115+
off += offsetof(struct qdisc_skb_cb, flow_keys);
6116+
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
6117+
si->src_reg, off);
6118+
break;
60586119
}
60596120

60606121
return insn - insn_buf;
@@ -7018,6 +7079,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
70187079
const struct bpf_prog_ops sk_msg_prog_ops = {
70197080
};
70207081

7082+
const struct bpf_verifier_ops flow_dissector_verifier_ops = {
7083+
.get_func_proto = flow_dissector_func_proto,
7084+
.is_valid_access = flow_dissector_is_valid_access,
7085+
.convert_ctx_access = bpf_convert_ctx_access,
7086+
};
7087+
7088+
const struct bpf_prog_ops flow_dissector_prog_ops = {
7089+
};
7090+
70217091
int sk_detach_filter(struct sock *sk)
70227092
{
70237093
int ret = -ENOENT;

0 commit comments

Comments
 (0)