[ovs-dev] [PATCH v5 3/3] conntrack: limit port clash resolution attempts
wenxu at ucloud.cn
wenxu at ucloud.cn
Sat Sep 18 12:42:44 UTC 2021
From: wenxu <wenxu at ucloud.cn>
In case almost or all available ports are taken, clash resolution can
take a very long time, resulting in pmd hang in conntrack.
This can happen when many to-be-natted hosts connect to same
destination:port (e.g. a proxy) and all connections pass the same SNAT.
Pick a random offset in the acceptable range, then try ever smaller
number of adjacent port numbers, until either the limit is reached or a
useable port was found. This results in at most 248 attempts
(128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset)
instead of 64000+.
And if thenumber of ip address will limit the max attempts and which
will lead the total attempts under 248.
Signed-off-by: wenxu <wenxu at ucloud.cn>
---
lib/conntrack.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 52b5211..02bc2b9 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2427,7 +2427,11 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn,
uint16_t min_sport, max_sport, curr_sport, orig_sport;
bool pat_proto = conn->key.nw_proto == IPPROTO_TCP ||
conn->key.nw_proto == IPPROTO_UDP;
+ unsigned int attempts, max_attempts, min_attempts;
uint16_t min_dport, max_dport, curr_dport;
+ uint16_t range_src, range_dst, range_max;
+ uint32_t range_addr;
+ unsigned int i;
min_addr = nat_info->min_addr;
max_addr = nat_info->max_addr;
@@ -2444,6 +2448,19 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn,
set_dport_range(nat_info, &conn->key, hash, &curr_dport,
&min_dport, &max_dport);
+ range_src = max_sport - min_sport + 1;
+ range_dst = max_dport - min_dport + 1;
+ range_max = range_src > range_dst ? range_src : range_dst;
+ range_addr = ntohl(max_addr.ipv4) - ntohl(min_addr.ipv4) + 1;
+ max_attempts = 128 / range_addr;
+ if (max_attempts < 1) {
+ max_attempts = 1;
+ }
+ min_attempts = 16 / range_addr;
+ if (min_attempts < 2) {
+ min_attempts = 2;
+ }
+
another_round:
store_addr_to_key(&curr_addr, &nat_conn->rev_key,
nat_info->nat_action);
@@ -2459,17 +2476,39 @@ another_round:
curr_sport = orig_sport;
+ attempts = range_max;
+ if (attempts > max_attempts) {
+ attempts = max_attempts;
+ }
+
+another_port_round:
+ i = 0;
FOR_EACH_PORT_IN_RANGE(curr_dport, min_dport, max_dport) {
nat_conn->rev_key.src.port = htons(curr_dport);
FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) {
- nat_conn->rev_key.dst.port = htons(curr_sport);
- if (!conn_lookup(ct, &nat_conn->rev_key,
- time_msec(), NULL, NULL)) {
- return true;
+ if (i++ < attempts) {
+ nat_conn->rev_key.dst.port = htons(curr_sport);
+ if (!conn_lookup(ct, &nat_conn->rev_key,
+ time_msec(), NULL, NULL)) {
+ return true;
+ }
+ } else {
+ goto next_attempts;
}
}
}
+next_attempts:
+ if (attempts >= range_max || attempts < min_attempts) {
+ goto next_addr;
+ }
+
+ attempts /= 2;
+ curr_dport = min_dport + (random_uint32() % range_dst);
+ curr_sport = min_sport + (random_uint32() % range_src);
+
+ goto another_port_round;
+
/* Check if next IP is in range and respin. Otherwise, notify
* exhaustion to the caller. */
next_addr:
--
1.8.3.1
More information about the dev
mailing list