Skip to content

Commit 6578171

Browse files
borkmanndavem330
authored andcommitted
bpf: add bpf_skb_change_proto helper
This patch adds a minimal helper for doing the groundwork of changing the skb->protocol in a controlled way. Currently supported is v4 to v6 and vice versa transitions, which allows f.e. for a minimal, static nat64 implementation where applications in containers that still require IPv4 can be transparently operated in an IPv6-only environment. For example, host facing veth of the container can transparently do the transitions in a programmatic way with the help of clsact qdisc and cls_bpf. Idea is to separate concerns for keeping complexity of the helper lower, which means that the programs utilize bpf_skb_change_proto(), bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done, instead of doing everything in a single helper (and thus partially duplicating helper functionality). Also, bpf_skb_change_proto() shouldn't need to deal with raw packet data as this is done by other helpers. bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to operate on a private one, push or pop additionally required header space and migrate the gso/gro meta data from the shared info. We do mark the gso type as dodgy so that headers are checked and segs recalculated by the gso/gro engine. The gso_size target is adapted as well. The flags argument added is currently reserved and can be used for future extensions. Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 80b48c4 commit 6578171

File tree

2 files changed

+214
-0
lines changed

2 files changed

+214
-0
lines changed

include/uapi/linux/bpf.h

+14
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,20 @@ enum bpf_func_id {
313313
*/
314314
BPF_FUNC_skb_get_tunnel_opt,
315315
BPF_FUNC_skb_set_tunnel_opt,
316+
317+
/**
318+
* bpf_skb_change_proto(skb, proto, flags)
319+
* Change protocol of the skb. Currently supported is
320+
* v4 -> v6, v6 -> v4 transitions. The helper will also
321+
* resize the skb. eBPF program is expected to fill the
322+
* new headers via skb_store_bytes and lX_csum_replace.
323+
* @skb: pointer to skb
324+
* @proto: new skb->protocol type
325+
* @flags: reserved
326+
* Return: 0 on success or negative error
327+
*/
328+
BPF_FUNC_skb_change_proto,
329+
316330
__BPF_FUNC_MAX_ID,
317331
};
318332

net/core/filter.c

+200
Original file line numberDiff line numberDiff line change
@@ -1783,6 +1783,202 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
17831783
};
17841784
EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
17851785

1786+
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
1787+
{
1788+
/* Caller already did skb_cow() with len as headroom,
1789+
* so no need to do it here.
1790+
*/
1791+
skb_push(skb, len);
1792+
memmove(skb->data, skb->data + len, off);
1793+
memset(skb->data + off, 0, len);
1794+
1795+
/* No skb_postpush_rcsum(skb, skb->data + off, len)
1796+
* needed here as it does not change the skb->csum
1797+
* result for checksum complete when summing over
1798+
* zeroed blocks.
1799+
*/
1800+
return 0;
1801+
}
1802+
1803+
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
1804+
{
1805+
/* skb_ensure_writable() is not needed here, as we're
1806+
* already working on an uncloned skb.
1807+
*/
1808+
if (unlikely(!pskb_may_pull(skb, off + len)))
1809+
return -ENOMEM;
1810+
1811+
skb_postpull_rcsum(skb, skb->data + off, len);
1812+
memmove(skb->data + len, skb->data, off);
1813+
__skb_pull(skb, len);
1814+
1815+
return 0;
1816+
}
1817+
1818+
static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
1819+
{
1820+
bool trans_same = skb->transport_header == skb->network_header;
1821+
int ret;
1822+
1823+
/* There's no need for __skb_push()/__skb_pull() pair to
1824+
* get to the start of the mac header as we're guaranteed
1825+
* to always start from here under eBPF.
1826+
*/
1827+
ret = bpf_skb_generic_push(skb, off, len);
1828+
if (likely(!ret)) {
1829+
skb->mac_header -= len;
1830+
skb->network_header -= len;
1831+
if (trans_same)
1832+
skb->transport_header = skb->network_header;
1833+
}
1834+
1835+
return ret;
1836+
}
1837+
1838+
static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
1839+
{
1840+
bool trans_same = skb->transport_header == skb->network_header;
1841+
int ret;
1842+
1843+
/* Same here, __skb_push()/__skb_pull() pair not needed. */
1844+
ret = bpf_skb_generic_pop(skb, off, len);
1845+
if (likely(!ret)) {
1846+
skb->mac_header += len;
1847+
skb->network_header += len;
1848+
if (trans_same)
1849+
skb->transport_header = skb->network_header;
1850+
}
1851+
1852+
return ret;
1853+
}
1854+
1855+
static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
1856+
{
1857+
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
1858+
u32 off = skb->network_header - skb->mac_header;
1859+
int ret;
1860+
1861+
ret = skb_cow(skb, len_diff);
1862+
if (unlikely(ret < 0))
1863+
return ret;
1864+
1865+
ret = bpf_skb_net_hdr_push(skb, off, len_diff);
1866+
if (unlikely(ret < 0))
1867+
return ret;
1868+
1869+
if (skb_is_gso(skb)) {
1870+
/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
1871+
* be changed into SKB_GSO_TCPV6.
1872+
*/
1873+
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
1874+
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
1875+
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
1876+
}
1877+
1878+
/* Due to IPv6 header, MSS needs to be downgraded. */
1879+
skb_shinfo(skb)->gso_size -= len_diff;
1880+
/* Header must be checked, and gso_segs recomputed. */
1881+
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1882+
skb_shinfo(skb)->gso_segs = 0;
1883+
}
1884+
1885+
skb->protocol = htons(ETH_P_IPV6);
1886+
skb_clear_hash(skb);
1887+
1888+
return 0;
1889+
}
1890+
1891+
static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
1892+
{
1893+
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
1894+
u32 off = skb->network_header - skb->mac_header;
1895+
int ret;
1896+
1897+
ret = skb_unclone(skb, GFP_ATOMIC);
1898+
if (unlikely(ret < 0))
1899+
return ret;
1900+
1901+
ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
1902+
if (unlikely(ret < 0))
1903+
return ret;
1904+
1905+
if (skb_is_gso(skb)) {
1906+
/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
1907+
* be changed into SKB_GSO_TCPV4.
1908+
*/
1909+
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
1910+
skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
1911+
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
1912+
}
1913+
1914+
/* Due to IPv4 header, MSS can be upgraded. */
1915+
skb_shinfo(skb)->gso_size += len_diff;
1916+
/* Header must be checked, and gso_segs recomputed. */
1917+
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1918+
skb_shinfo(skb)->gso_segs = 0;
1919+
}
1920+
1921+
skb->protocol = htons(ETH_P_IP);
1922+
skb_clear_hash(skb);
1923+
1924+
return 0;
1925+
}
1926+
1927+
static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
1928+
{
1929+
__be16 from_proto = skb->protocol;
1930+
1931+
if (from_proto == htons(ETH_P_IP) &&
1932+
to_proto == htons(ETH_P_IPV6))
1933+
return bpf_skb_proto_4_to_6(skb);
1934+
1935+
if (from_proto == htons(ETH_P_IPV6) &&
1936+
to_proto == htons(ETH_P_IP))
1937+
return bpf_skb_proto_6_to_4(skb);
1938+
1939+
return -ENOTSUPP;
1940+
}
1941+
1942+
static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
1943+
{
1944+
struct sk_buff *skb = (struct sk_buff *) (long) r1;
1945+
__be16 proto = (__force __be16) r2;
1946+
int ret;
1947+
1948+
if (unlikely(flags))
1949+
return -EINVAL;
1950+
1951+
/* General idea is that this helper does the basic groundwork
1952+
* needed for changing the protocol, and eBPF program fills the
1953+
* rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
1954+
* and other helpers, rather than passing a raw buffer here.
1955+
*
1956+
* The rationale is to keep this minimal and without a need to
1957+
* deal with raw packet data. F.e. even if we would pass buffers
1958+
* here, the program still needs to call the bpf_lX_csum_replace()
1959+
* helpers anyway. Plus, this way we keep also separation of
1960+
* concerns, since f.e. bpf_skb_store_bytes() should only take
1961+
* care of stores.
1962+
*
1963+
* Currently, additional options and extension header space are
1964+
* not supported, but flags register is reserved so we can adapt
1965+
* that. For offloads, we mark packet as dodgy, so that headers
1966+
* need to be verified first.
1967+
*/
1968+
ret = bpf_skb_proto_xlat(skb, proto);
1969+
bpf_compute_data_end(skb);
1970+
return ret;
1971+
}
1972+
1973+
static const struct bpf_func_proto bpf_skb_change_proto_proto = {
1974+
.func = bpf_skb_change_proto,
1975+
.gpl_only = false,
1976+
.ret_type = RET_INTEGER,
1977+
.arg1_type = ARG_PTR_TO_CTX,
1978+
.arg2_type = ARG_ANYTHING,
1979+
.arg3_type = ARG_ANYTHING,
1980+
};
1981+
17861982
bool bpf_helper_changes_skb_data(void *func)
17871983
{
17881984
if (func == bpf_skb_vlan_push)
@@ -1791,6 +1987,8 @@ bool bpf_helper_changes_skb_data(void *func)
17911987
return true;
17921988
if (func == bpf_skb_store_bytes)
17931989
return true;
1990+
if (func == bpf_skb_change_proto)
1991+
return true;
17941992
if (func == bpf_l3_csum_replace)
17951993
return true;
17961994
if (func == bpf_l4_csum_replace)
@@ -2078,6 +2276,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
20782276
return &bpf_skb_vlan_push_proto;
20792277
case BPF_FUNC_skb_vlan_pop:
20802278
return &bpf_skb_vlan_pop_proto;
2279+
case BPF_FUNC_skb_change_proto:
2280+
return &bpf_skb_change_proto_proto;
20812281
case BPF_FUNC_skb_get_tunnel_key:
20822282
return &bpf_skb_get_tunnel_key_proto;
20832283
case BPF_FUNC_skb_set_tunnel_key:

0 commit comments

Comments
 (0)