Skip to content

Commit 761876c

Browse files
jasowangdavem330
authored andcommitted
tap: XDP support
This patch tries to implement XDP for tun. The implementation was split into two parts: - fast path: small and no gso packet. We try to do XDP at page level before build_skb(). For XDP_TX, since creating/destroying queues were completely under control of userspace, it was implemented through generic XDP helper after skb has been built. This could be optimized in the future. - slow path: big or gso packet. We try to do it after skb was created through generic XDP helpers. Test were done through pktgen with small packets. xdp1 test shows ~41.1% improvement: Before: ~1.7Mpps After: ~2.3Mpps xdp_redirect to ixgbe shows ~60% improvement: Before: ~0.8Mpps After: ~1.38Mpps Suggested-by: Michael S. Tsirkin <[email protected]> Signed-off-by: Jason Wang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7c49747 commit 761876c

File tree

1 file changed

+142
-7
lines changed

1 file changed

+142
-7
lines changed

drivers/net/tun.c

+142-7
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
#include <linux/seq_file.h>
7474
#include <linux/uio.h>
7575
#include <linux/skb_array.h>
76+
#include <linux/bpf.h>
77+
#include <linux/bpf_trace.h>
7678

7779
#include <linux/uaccess.h>
7880

@@ -105,7 +107,8 @@ do { \
105107
} while (0)
106108
#endif
107109

108-
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
110+
#define TUN_HEADROOM 256
111+
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM)
109112

110113
/* TUN device flags */
111114

@@ -224,6 +227,7 @@ struct tun_struct {
224227
u32 flow_count;
225228
u32 rx_batched;
226229
struct tun_pcpu_stats __percpu *pcpu_stats;
230+
struct bpf_prog __rcu *xdp_prog;
227231
};
228232

229233
#ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -590,6 +594,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
590594
static void tun_detach_all(struct net_device *dev)
591595
{
592596
struct tun_struct *tun = netdev_priv(dev);
597+
struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
593598
struct tun_file *tfile, *tmp;
594599
int i, n = tun->numqueues;
595600

@@ -622,6 +627,9 @@ static void tun_detach_all(struct net_device *dev)
622627
}
623628
BUG_ON(tun->numdisabled != 0);
624629

630+
if (xdp_prog)
631+
bpf_prog_put(xdp_prog);
632+
625633
if (tun->flags & IFF_PERSIST)
626634
module_put(THIS_MODULE);
627635
}
@@ -1008,6 +1016,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
10081016
stats->tx_dropped = tx_dropped;
10091017
}
10101018

1019+
static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1020+
struct netlink_ext_ack *extack)
1021+
{
1022+
struct tun_struct *tun = netdev_priv(dev);
1023+
struct bpf_prog *old_prog;
1024+
1025+
old_prog = rtnl_dereference(tun->xdp_prog);
1026+
rcu_assign_pointer(tun->xdp_prog, prog);
1027+
if (old_prog)
1028+
bpf_prog_put(old_prog);
1029+
1030+
return 0;
1031+
}
1032+
1033+
static u32 tun_xdp_query(struct net_device *dev)
1034+
{
1035+
struct tun_struct *tun = netdev_priv(dev);
1036+
const struct bpf_prog *xdp_prog;
1037+
1038+
xdp_prog = rtnl_dereference(tun->xdp_prog);
1039+
if (xdp_prog)
1040+
return xdp_prog->aux->id;
1041+
1042+
return 0;
1043+
}
1044+
1045+
static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
1046+
{
1047+
switch (xdp->command) {
1048+
case XDP_SETUP_PROG:
1049+
return tun_xdp_set(dev, xdp->prog, xdp->extack);
1050+
case XDP_QUERY_PROG:
1051+
xdp->prog_id = tun_xdp_query(dev);
1052+
xdp->prog_attached = !!xdp->prog_id;
1053+
return 0;
1054+
default:
1055+
return -EINVAL;
1056+
}
1057+
}
1058+
10111059
static const struct net_device_ops tun_netdev_ops = {
10121060
.ndo_uninit = tun_net_uninit,
10131061
.ndo_open = tun_net_open,
@@ -1038,6 +1086,7 @@ static const struct net_device_ops tap_netdev_ops = {
10381086
.ndo_features_check = passthru_features_check,
10391087
.ndo_set_rx_headroom = tun_set_headroom,
10401088
.ndo_get_stats64 = tun_net_get_stats64,
1089+
.ndo_xdp = tun_xdp,
10411090
};
10421091

10431092
static void tun_flow_init(struct tun_struct *tun)
@@ -1217,16 +1266,22 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
12171266
return true;
12181267
}
12191268

1220-
static struct sk_buff *tun_build_skb(struct tun_file *tfile,
1269+
static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1270+
struct tun_file *tfile,
12211271
struct iov_iter *from,
1222-
int len)
1272+
struct virtio_net_hdr *hdr,
1273+
int len, int *generic_xdp)
12231274
{
12241275
struct page_frag *alloc_frag = &tfile->alloc_frag;
12251276
struct sk_buff *skb;
1277+
struct bpf_prog *xdp_prog;
12261278
int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
12271279
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1280+
unsigned int delta = 0;
12281281
char *buf;
12291282
size_t copied;
1283+
bool xdp_xmit = false;
1284+
int err;
12301285

12311286
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
12321287
return ERR_PTR(-ENOMEM);
@@ -1238,16 +1293,77 @@ static struct sk_buff *tun_build_skb(struct tun_file *tfile,
12381293
if (copied != len)
12391294
return ERR_PTR(-EFAULT);
12401295

1296+
if (hdr->gso_type)
1297+
*generic_xdp = 1;
1298+
else
1299+
*generic_xdp = 0;
1300+
1301+
rcu_read_lock();
1302+
xdp_prog = rcu_dereference(tun->xdp_prog);
1303+
if (xdp_prog && !*generic_xdp) {
1304+
struct xdp_buff xdp;
1305+
void *orig_data;
1306+
u32 act;
1307+
1308+
xdp.data_hard_start = buf;
1309+
xdp.data = buf + TUN_RX_PAD;
1310+
xdp.data_end = xdp.data + len;
1311+
orig_data = xdp.data;
1312+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
1313+
1314+
switch (act) {
1315+
case XDP_REDIRECT:
1316+
get_page(alloc_frag->page);
1317+
alloc_frag->offset += buflen;
1318+
err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
1319+
if (err)
1320+
goto err_redirect;
1321+
return NULL;
1322+
case XDP_TX:
1323+
xdp_xmit = true;
1324+
/* fall through */
1325+
case XDP_PASS:
1326+
delta = orig_data - xdp.data;
1327+
break;
1328+
default:
1329+
bpf_warn_invalid_xdp_action(act);
1330+
/* fall through */
1331+
case XDP_ABORTED:
1332+
trace_xdp_exception(tun->dev, xdp_prog, act);
1333+
/* fall through */
1334+
case XDP_DROP:
1335+
goto err_xdp;
1336+
}
1337+
}
1338+
12411339
skb = build_skb(buf, buflen);
1242-
if (!skb)
1340+
if (!skb) {
1341+
rcu_read_unlock();
12431342
return ERR_PTR(-ENOMEM);
1343+
}
12441344

1245-
skb_reserve(skb, TUN_RX_PAD);
1246-
skb_put(skb, len);
1345+
skb_reserve(skb, TUN_RX_PAD - delta);
1346+
skb_put(skb, len + delta);
12471347
get_page(alloc_frag->page);
12481348
alloc_frag->offset += buflen;
12491349

1350+
if (xdp_xmit) {
1351+
skb->dev = tun->dev;
1352+
generic_xdp_tx(skb, xdp_prog);
1353+
rcu_read_lock();
1354+
return NULL;
1355+
}
1356+
1357+
rcu_read_unlock();
1358+
12501359
return skb;
1360+
1361+
err_redirect:
1362+
put_page(alloc_frag->page);
1363+
err_xdp:
1364+
rcu_read_unlock();
1365+
this_cpu_inc(tun->pcpu_stats->rx_dropped);
1366+
return NULL;
12511367
}
12521368

12531369
/* Get packet from user space buffer */
@@ -1266,6 +1382,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
12661382
bool zerocopy = false;
12671383
int err;
12681384
u32 rxhash;
1385+
int generic_xdp = 1;
12691386

12701387
if (!(tun->dev->flags & IFF_UP))
12711388
return -EIO;
@@ -1324,11 +1441,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
13241441
}
13251442

13261443
if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
1327-
skb = tun_build_skb(tfile, from, len);
1444+
skb = tun_build_skb(tun, tfile, from, &gso, len, &generic_xdp);
13281445
if (IS_ERR(skb)) {
13291446
this_cpu_inc(tun->pcpu_stats->rx_dropped);
13301447
return PTR_ERR(skb);
13311448
}
1449+
if (!skb)
1450+
return total_len;
13321451
} else {
13331452
if (!zerocopy) {
13341453
copylen = len;
@@ -1402,6 +1521,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
14021521
skb_reset_network_header(skb);
14031522
skb_probe_transport_header(skb, 0);
14041523

1524+
if (generic_xdp) {
1525+
struct bpf_prog *xdp_prog;
1526+
int ret;
1527+
1528+
rcu_read_lock();
1529+
xdp_prog = rcu_dereference(tun->xdp_prog);
1530+
if (xdp_prog) {
1531+
ret = do_xdp_generic(xdp_prog, skb);
1532+
if (ret != XDP_PASS) {
1533+
rcu_read_unlock();
1534+
return total_len;
1535+
}
1536+
}
1537+
rcu_read_unlock();
1538+
}
1539+
14051540
rxhash = __skb_get_hash_symmetric(skb);
14061541
#ifndef CONFIG_4KSTACKS
14071542
tun_rx_batched(tun, tfile, skb, more);

0 commit comments

Comments
 (0)