Skip to content

Commit ba37b4d

Browse files
AkhileshSamineniyxieca
authored andcommitted
Ported Fullcone NAT changes are ported from 5.10 to 6.1 kernel. (#357)
* Fullcone NAT changes are ported from 5.10 to 6.1 kernel. Signed-off-by: Akhilesh Saminei <[email protected]> * Fixed compilation issues. Signed-off-by: Akhilesh Saminei <[email protected]> --------- Signed-off-by: Akhilesh Saminei <[email protected]>
1 parent b899479 commit ba37b4d

File tree

2 files changed

+84
-64
lines changed

2 files changed

+84
-64
lines changed

patch/Support-for-fullcone-nat.patch

+83-63
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
From 660e63c0bbae1a7f58dadf04c1b7a9eef7621227 Mon Sep 17 00:00:00 2001
2-
From: Kiran Kella <kiran.kella@broadcom.com>
3-
Date: Tue, 5 Oct 2021 23:26:02 -0700
4-
Subject: [PATCH] netfilter: nf_nat: Support fullcone NAT
1+
From d1dd893ddae49ca4dc55073449c37d5b97504c05 Mon Sep 17 00:00:00 2001
2+
From: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
3+
Date: Mon, 6 Nov 2023 11:55:58 -0800
4+
Subject: [PATCH] Support fullcone NAT
55

66
Changes done in the kernel to ensure 3-tuple uniqueness of the conntrack
77
entries for the fullcone nat functionality.
@@ -27,43 +27,42 @@ The kernel changes mentioned above are done to counter the challenges
2727
explained in the section *3.4.2.1 Handling NAT model mismatch between
2828
the ASIC and the Kernel* in the NAT HLD [1].
2929

30-
[1]: https://github.com/kirankella/SONiC/blob/nat_doc_changes/doc/nat/nat_design_spec.md
30+
[1]: https://github.com/sonic-net/SONiC/blob/master/doc/nat/nat_design_spec.md
3131

32-
Signed-off-by: Kiran Kella <kiran.kella@broadcom.com>
32+
Signed-off-by: Akhilesh Samineni <akhilesh.samineni@broadcom.com>
3333
---
34-
include/net/netfilter/nf_conntrack.h | 3 +
35-
include/uapi/linux/netfilter/nf_nat.h | 4 +-
36-
net/netfilter/nf_nat_core.c | 204 ++++++++++++++++++++++----
37-
3 files changed, 180 insertions(+), 31 deletions(-)
34+
include/net/netfilter/nf_conntrack.h | 3 +
35+
include/uapi/linux/netfilter/nf_nat.h | 3 +-
36+
net/netfilter/nf_nat_core.c | 222 +++++++++++++++---
37+
3 files changed, 197 insertions(+), 31 deletions(-)
3838

3939
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
40-
index 439379ca9..c4c05b7b0 100644
40+
index 6a2019aaa..191d6367c 100644
4141
--- a/include/net/netfilter/nf_conntrack.h
4242
+++ b/include/net/netfilter/nf_conntrack.h
43-
@@ -85,6 +85,9 @@ struct nf_conn {
43+
@@ -103,6 +103,9 @@ struct nf_conn {
4444

4545
#if IS_ENABLED(CONFIG_NF_NAT)
4646
struct hlist_node nat_bysource;
4747
+
48-
+ /* To optionally ensure 3-tuple uniqueness on the translated source */
49-
+ struct hlist_node nat_by_manip_src;
48+
+ /* To optionally ensure 3-tuple uniqueness on the translated source */
49+
+ struct hlist_node nat_by_manip_src;
5050
#endif
5151
/* all members below initialized via memset */
5252
struct { } __nfct_init_offset;
5353
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
54-
index a64586e77..9b3f48a7d 100644
54+
index a64586e77..d60f5a9c2 100644
5555
--- a/include/uapi/linux/netfilter/nf_nat.h
5656
+++ b/include/uapi/linux/netfilter/nf_nat.h
57-
@@ -13,6 +13,8 @@
57+
@@ -12,6 +12,7 @@
58+
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
5859
#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
5960
#define NF_NAT_RANGE_NETMAP (1 << 6)
61+
+#define NF_NAT_RANGE_FULLCONE (1 << 10)
6062

61-
+#define NF_NAT_RANGE_FULLCONE (1 << 10)
62-
+
6363
#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
6464
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
65-
66-
@@ -20,7 +22,7 @@
65+
@@ -20,7 +21,7 @@
6766
(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
6867
NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
6968
NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
@@ -73,7 +72,7 @@ index a64586e77..9b3f48a7d 100644
7372
struct nf_nat_ipv4_range {
7473
unsigned int flags;
7574
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
76-
index b7c3c9022..16cac0253 100644
75+
index e29e4ccb5..678b50967 100644
7776
--- a/net/netfilter/nf_nat_core.c
7877
+++ b/net/netfilter/nf_nat_core.c
7978
@@ -33,6 +33,7 @@ static DEFINE_MUTEX(nf_nat_proto_mutex);
@@ -82,41 +81,60 @@ index b7c3c9022..16cac0253 100644
8281
static struct hlist_head *nf_nat_bysource __read_mostly;
8382
+static struct hlist_head *nf_nat_by_manip_src __read_mostly;
8483
static unsigned int nf_nat_htable_size __read_mostly;
85-
static unsigned int nf_nat_hash_rnd __read_mostly;
84+
static siphash_aligned_key_t nf_nat_hash_rnd;
8685

87-
@@ -200,6 +201,31 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
86+
@@ -180,6 +181,50 @@ hash_by_src(const struct net *net,
8887
return reciprocal_scale(hash, nf_nat_htable_size);
8988
}
9089

9190
+static inline unsigned int
92-
+hash_by_dst(const struct net *n, const struct nf_conntrack_tuple *tuple)
91+
+hash_by_dst(const struct net *net,
92+
+ const struct nf_conntrack_zone *zone,
93+
+ const struct nf_conntrack_tuple *tuple)
9394
+{
94-
+ unsigned int hash;
95+
+ unsigned int hash;
96+
+ struct {
97+
+ union nf_inet_addr dst_addr;
98+
+ u32 net_mix;
99+
+ u16 dport;
100+
+ u32 protonum;
101+
+ u32 zone;
102+
+ } __aligned(SIPHASH_ALIGNMENT) combined;
103+
+
104+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
105+
+
106+
+ memset(&combined, 0, sizeof(combined));
107+
+
108+
+ combined.dst_addr = tuple->dst.u3;
109+
+ combined.net_mix = net_hash_mix(net);
110+
+ combined.protonum = tuple->dst.protonum;
111+
+ combined.dport = (__force __u16)tuple->dst.u.all;
95112
+
96-
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
113+
+ /* Zone ID can be used provided its valid for both directions */
114+
+ if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
115+
+ combined.zone = zone->id;
97116
+
98-
+ hash = jhash2((u32 *)&tuple->dst, sizeof(tuple->dst) / sizeof(u32),
99-
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
117+
+ hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
100118
+
101-
+ return reciprocal_scale(hash, nf_nat_htable_size);
119+
+ return reciprocal_scale(hash, nf_nat_htable_size);
102120
+}
103121
+
104122
+static inline int
105123
+same_reply_dst(const struct nf_conn *ct,
106124
+ const struct nf_conntrack_tuple *tuple)
107125
+{
108-
+ const struct nf_conntrack_tuple *t;
126+
+ const struct nf_conntrack_tuple *t;
109127
+
110-
+ t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
111-
+ return (t->dst.protonum == tuple->dst.protonum &&
112-
+ nf_inet_addr_cmp(&t->dst.u3, &tuple->dst.u3) &&
113-
+ t->dst.u.all == tuple->dst.u.all);
128+
+ t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
129+
+ return (t->dst.protonum == tuple->dst.protonum &&
130+
+ nf_inet_addr_cmp(&t->dst.u3, &tuple->dst.u3) &&
131+
+ t->dst.u.all == tuple->dst.u.all);
114132
+}
115133
+
116134
/* Is this tuple already taken? (not by us) */
117135
static int
118136
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
119-
@@ -217,6 +243,38 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
137+
@@ -197,6 +242,38 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
120138
return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
121139
}
122140

@@ -138,7 +156,7 @@ index b7c3c9022..16cac0253 100644
138156
+ zone = nf_ct_zone(ignored_conntrack);
139157
+
140158
+ /* The tuple passed here is the inverted reply (with translated source) */
141-
+ h = hash_by_src(net, tuple);
159+
+ h = hash_by_src(net, zone, tuple);
142160
+ hlist_for_each_entry_rcu(ct, &nf_nat_by_manip_src[h], nat_by_manip_src) {
143161
+ struct nf_conntrack_tuple reply;
144162
+ nf_ct_invert_tuple(&reply, tuple);
@@ -155,7 +173,7 @@ index b7c3c9022..16cac0253 100644
155173
static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
156174
const struct nf_nat_range2 *range)
157175
{
158-
@@ -318,6 +376,34 @@ find_appropriate_src(struct net *net,
176+
@@ -298,6 +375,33 @@ find_appropriate_src(struct net *net,
159177
return 0;
160178
}
161179

@@ -171,7 +189,7 @@ index b7c3c9022..16cac0253 100644
171189
+ const struct nf_conn *ct;
172190
+
173191
+ nf_ct_invert_tuple(&reply, tuple);
174-
+ h = hash_by_src(net, &reply);
192+
+ h = hash_by_src(net, zone, &reply);
175193
+
176194
+ hlist_for_each_entry_rcu(ct, &nf_nat_by_manip_src[h], nat_by_manip_src) {
177195
+ if (same_reply_dst(ct, tuple) &&
@@ -186,11 +204,10 @@ index b7c3c9022..16cac0253 100644
186204
+ }
187205
+ return 0;
188206
+}
189-
+
190207
/* For [FUTURE] fragmentation handling, we want the least-used
191208
* src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
192209
* if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
193-
@@ -397,10 +483,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
210+
@@ -377,10 +481,10 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
194211
*
195212
* Per-protocol part of tuple is initialized to the incoming packet.
196213
*/
@@ -205,7 +222,7 @@ index b7c3c9022..16cac0253 100644
205222
{
206223
unsigned int range_size, min, max, i, attempts;
207224
__be16 *keyptr;
208-
@@ -426,7 +512,7 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
225+
@@ -406,7 +510,7 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
209226
/* If there is no master conntrack we are not PPTP,
210227
do not change tuples */
211228
if (!ct->master)
@@ -214,7 +231,7 @@ index b7c3c9022..16cac0253 100644
214231

215232
if (maniptype == NF_NAT_MANIP_SRC)
216233
keyptr = &tuple->src.u.gre.key;
217-
@@ -454,14 +540,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
234+
@@ -434,14 +538,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
218235

219236
break;
220237
default:
@@ -231,7 +248,7 @@ index b7c3c9022..16cac0253 100644
231248

232249
if (ntohs(*keyptr) < 1024) {
233250
/* Loose convention: >> 512 is credential passing */
234-
@@ -503,12 +589,18 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
251+
@@ -483,12 +587,18 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
235252
another_round:
236253
for (i = 0; i < attempts; i++, off++) {
237254
*keyptr = htons(min + off % range_size);
@@ -251,9 +268,9 @@ index b7c3c9022..16cac0253 100644
251268
- return;
252269
+ return 0;
253270
attempts /= 2;
254-
off = prandom_u32();
271+
off = get_random_u16();
255272
goto another_round;
256-
@@ -517,10 +609,15 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
273+
@@ -497,10 +607,15 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
257274
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
258275
* we change the source to map into the range. For NF_INET_PRE_ROUTING
259276
* and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -272,7 +289,7 @@ index b7c3c9022..16cac0253 100644
272289
get_unique_tuple(struct nf_conntrack_tuple *tuple,
273290
const struct nf_conntrack_tuple *orig_tuple,
274291
const struct nf_nat_range2 *range,
275-
@@ -528,8 +625,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
292+
@@ -508,8 +623,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
276293
enum nf_nat_manip_type maniptype)
277294
{
278295
const struct nf_conntrack_zone *zone;
@@ -284,12 +301,12 @@ index b7c3c9022..16cac0253 100644
284301
zone = nf_ct_zone(ct);
285302

286303
/* 1) If this srcip/proto/src-proto-part is currently mapped,
287-
@@ -541,46 +641,76 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
304+
@@ -521,46 +639,76 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
288305
* manips not an issue.
289306
*/
290307
if (maniptype == NF_NAT_MANIP_SRC &&
291308
- !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
292-
+ !(nat_range.flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
309+
+ !(nat_range.flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
293310
/* try the original tuple first */
294311
- if (in_range(orig_tuple, range)) {
295312
+ if (in_range(orig_tuple, &nat_range)) {
@@ -377,7 +394,7 @@ index b7c3c9022..16cac0253 100644
377394
}
378395

379396
struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
380-
@@ -622,7 +752,9 @@ nf_nat_setup_info(struct nf_conn *ct,
397+
@@ -602,7 +750,9 @@ nf_nat_setup_info(struct nf_conn *ct,
381398
nf_ct_invert_tuple(&curr_tuple,
382399
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
383400

@@ -388,15 +405,15 @@ index b7c3c9022..16cac0253 100644
388405

389406
if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
390407
struct nf_conntrack_tuple reply;
391-
@@ -644,12 +776,16 @@ nf_nat_setup_info(struct nf_conn *ct,
408+
@@ -624,12 +774,16 @@ nf_nat_setup_info(struct nf_conn *ct,
392409

393410
if (maniptype == NF_NAT_MANIP_SRC) {
394411
unsigned int srchash;
395412
+ unsigned int manip_src_hash;
396413
spinlock_t *lock;
397414

398-
+ manip_src_hash = hash_by_src(net, &new_tuple);
399-
srchash = hash_by_src(net,
415+
+ manip_src_hash = hash_by_src(net, nf_ct_zone(ct), &new_tuple);
416+
srchash = hash_by_src(net, nf_ct_zone(ct),
400417
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
401418
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
402419
spin_lock_bh(lock);
@@ -405,38 +422,41 @@ index b7c3c9022..16cac0253 100644
405422
hlist_add_head_rcu(&ct->nat_bysource,
406423
&nf_nat_bysource[srchash]);
407424
spin_unlock_bh(lock);
408-
@@ -818,6 +954,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
409-
h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
425+
@@ -808,6 +962,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
426+
h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
410427
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
411428
hlist_del_rcu(&ct->nat_bysource);
412429
+ hlist_del_rcu(&ct->nat_by_manip_src);
413430
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
414431
}
415432

416-
@@ -1161,9 +1298,14 @@ static int __init nf_nat_init(void)
433+
@@ -1138,12 +1293,17 @@ static int __init nf_nat_init(void)
417434
if (!nf_nat_bysource)
418435
return -ENOMEM;
419436

420437
+ nf_nat_by_manip_src = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
421438
+ if (!nf_nat_by_manip_src)
422439
+ return -ENOMEM;
423440
+
424-
ret = nf_ct_extend_register(&nat_extend);
441+
for (i = 0; i < CONNTRACK_LOCKS; i++)
442+
spin_lock_init(&nf_nat_locks[i]);
443+
444+
ret = register_pernet_subsys(&nat_net_ops);
425445
if (ret < 0) {
426446
kvfree(nf_nat_bysource);
427447
+ kvfree(nf_nat_by_manip_src);
428-
pr_err("Unable to register extension\n");
429448
return ret;
430449
}
431-
@@ -1175,6 +1317,7 @@ static int __init nf_nat_init(void)
432-
if (ret < 0) {
433-
nf_ct_extend_unregister(&nat_extend);
450+
451+
@@ -1159,6 +1319,7 @@ static int __init nf_nat_init(void)
452+
synchronize_net();
453+
unregister_pernet_subsys(&nat_net_ops);
434454
kvfree(nf_nat_bysource);
435455
+ kvfree(nf_nat_by_manip_src);
436-
return ret;
437456
}
438457

439-
@@ -1198,6 +1341,7 @@ static void __exit nf_nat_cleanup(void)
458+
return ret;
459+
@@ -1175,6 +1336,7 @@ static void __exit nf_nat_cleanup(void)
440460

441461
synchronize_net();
442462
kvfree(nf_nat_bysource);
@@ -445,5 +465,5 @@ index b7c3c9022..16cac0253 100644
445465
}
446466

447467
--
448-
2.27.0
468+
2.18.0
449469

patch/series

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ driver-net-tg3-add-param-short-preamble-and-reset.patch
3636
0004-dt-bindings-hwmon-Add-missing-documentation-for-lm75.patch
3737
0005-dt-bindings-hwmon-Add-tmp75b-to-lm75.txt.patch
3838
0006-device-tree-bindinds-add-NXP-PCT2075-as-compatible-d.patch
39-
#Support-for-fullcone-nat.patch # TODO: update for current version
39+
Support-for-fullcone-nat.patch
4040
#driver-ixgbe-external-phy.patch # Upstreamed
4141
#kernel-compat-always-include-linux-compat.h-from-net-compat.patch # Upstreamed
4242
#net-sch_generic-fix-the-missing-new-qdisc-assignment.patch # Functionality is present

0 commit comments

Comments
 (0)