[ovs-dev] [PATCH ovn v3 5/5] ovn-controller: Handle DNAT/no-NAT conntrack tuple collisions.

Dumitru Ceara dceara at redhat.com
Fri Jun 11 10:11:31 UTC 2021


Assuming a load balancer, LB1, with:
- VIP: 42.42.42.42:4242
- backend: 42.42.42.1:2121

A client might connect to the backend either directly or through the
VIP.  If the first connection is via the VIP and the second connection
is direct but happens to use the same source L4 port, OVN should make
sure that the second connection is SNATed (source port translation) in
order to avoid a tuple collision at commit time.

For example:
1. Session via the VIP:
   - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242
   - after DNAT:      src=42.42.42.2:2000, dst=42.42.42.1:2121
2. Session directly connected to the backend:
   - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121
   - in acl stage committing this connection would fail.

In order to avoid this we now use the all-zero-ip NAT OVS feature when
committing conneections in the ACL stage.  This translates to a no-op
SNAT when there's no tuple collision, and performs source port
translation when a tuple collision would happen.

We program flows to perform all-zero-ip NAT conditionally, only if the
datapath being used supports it.

Reported-at: https://bugzilla.redhat.com/1939676
Signed-off-by: Dumitru Ceara <dceara at redhat.com>
---
 include/ovn/actions.h         |    1 
 lib/actions.c                 |   31 +++++++
 tests/ovn.at                  |    2 
 tests/system-common-macros.at |    4 +
 tests/system-ovn.at           |  190 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 227 insertions(+), 1 deletion(-)

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 040213177..f5eb01eb7 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -25,6 +25,7 @@
 #include "openvswitch/hmap.h"
 #include "openvswitch/uuid.h"
 #include "util.h"
+#include "ovn/features.h"
 
 struct expr;
 struct lexer;
diff --git a/lib/actions.c b/lib/actions.c
index b3433f49e..7010fab2b 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -742,6 +742,22 @@ encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc,
     ct->zone_src.ofs = 0;
     ct->zone_src.n_bits = 16;
 
+    /* If the datapath supports all-zero SNAT then use it to avoid tuple
+     * collisions at commit time between NATed and firewalled-only sessions.
+     */
+
+    if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) {
+        size_t nat_offset = ofpacts->size;
+        ofpbuf_pull(ofpacts, nat_offset);
+
+        struct ofpact_nat *nat = ofpact_put_NAT(ofpacts);
+        nat->flags = 0;
+        nat->range_af = AF_UNSPEC;
+        nat->flags |= NX_NAT_F_SRC;
+        ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
+        ct = ofpacts->header;
+    }
+
     size_t set_field_offset = ofpacts->size;
     ofpbuf_pull(ofpacts, set_field_offset);
 
@@ -792,6 +808,21 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on,
     ct->zone_src.ofs = 0;
     ct->zone_src.n_bits = 16;
 
+    /* If the datapath supports all-zero SNAT then use it to avoid tuple
+     * collisions at commit time between NATed and firewalled-only sessions.
+     */
+    if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) {
+        size_t nat_offset = ofpacts->size;
+        ofpbuf_pull(ofpacts, nat_offset);
+
+        struct ofpact_nat *nat = ofpact_put_NAT(ofpacts);
+        nat->flags = 0;
+        nat->range_af = AF_UNSPEC;
+        nat->flags |= NX_NAT_F_SRC;
+        ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
+        ct = ofpacts->header;
+    }
+
     size_t set_field_offset = ofpacts->size;
     ofpbuf_pull(ofpacts, set_field_offset);
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 11a85c457..580dea825 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -23109,7 +23109,7 @@ AT_CHECK([
     for hv in 1 2; do
         grep table=15 hv${hv}flows | \
         grep "priority=100" | \
-        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
+        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
 
         grep table=22 hv${hv}flows | \
         grep "priority=200" | \
diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
index c8fa6f03f..b742a2cb9 100644
--- a/tests/system-common-macros.at
+++ b/tests/system-common-macros.at
@@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP],
 # OVS_CHECK_CT_CLEAR()
 m4_define([OVS_CHECK_CT_CLEAR],
     [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" ovs-vswitchd.log])])
+
+# OVS_CHECK_CT_ZERO_SNAT()
+m4_define([OVS_CHECK_CT_ZERO_SNAT],
+    [AT_SKIP_IF([! grep -q "Datapath supports ct_zero_snat" ovs-vswitchd.log])]))
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 0bebd9fb6..1b8bb3803 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -5296,6 +5296,196 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv4])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_KEYWORDS([ovnlb])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+OVS_CHECK_CT_ZERO_SNAT()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# 1 logical switch connetected to one logical router.
+# 2 VMs, one used as backend for a load balancer.
+
+check ovn-nbctl                                                  \
+    -- lr-add rtr                                                \
+    -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24        \
+    -- ls-add ls                                                 \
+    -- lsp-add ls ls-rtr                                         \
+    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00                \
+    -- lsp-set-type ls-rtr router                                \
+    -- lsp-set-options ls-rtr router-port=rtr-ls                 \
+    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
+    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \
+    -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp        \
+    -- ls-lb-add ls lb-test
+
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", "42.42.42.1")
+
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", "42.42.42.1")
+
+# Wait for ovn-controller to catch up.
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+# Start IPv4 TCP server on vm1.
+NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid])
+
+# Make sure connecting to the VIP works.
+NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z])
+
+# Start IPv4 TCP connection to VIP from vm2.
+NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2001 -z])
+
+# Check conntrack.  We expect two entries:
+# - one in vm1's zone (firewall)
+# - one in vm2's zone (dnat)
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
+grep "orig=.src=42\.42\.42\.3" |                                    \
+sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
+    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
+    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
+    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
+tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=42.42.42.3,dst=66.66.66.66,sport=<clnt_s_port>,dport=666),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,labels=0x2,protoinfo=(state=<cleared>)
+])
+
+# Start IPv4 TCP connection to backend IP from vm2 which would require
+# additional source port translation to avoid a tuple conflict.
+NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z])
+
+# Check conntrack.  We expect three entries:
+# - one in vm1's zone (firewall) - reused from the previous connection.
+# - one in vm2's zone (dnat) - still in TIME_WAIT after the previous connection.
+# - one in vm2's zone (firewall + additional all-zero SNAT)
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
+grep "orig=.src=42\.42\.42\.3" |                                    \
+sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
+    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
+    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
+    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
+tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=42.42.42.3,dst=66.66.66.66,sport=<clnt_s_port>,dport=666),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,labels=0x2,protoinfo=(state=<cleared>)
+])
+
+AT_CLEANUP
+])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv6])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_KEYWORDS([ovnlb])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+OVS_CHECK_CT_ZERO_SNAT()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# 1 logical switch connetected to one logical router.
+# 2 VMs, one used as backend for a load balancer.
+
+check ovn-nbctl                                                  \
+    -- lr-add rtr                                                \
+    -- lrp-add rtr rtr-ls 00:00:00:00:01:00 4242::1/64           \
+    -- ls-add ls                                                 \
+    -- lsp-add ls ls-rtr                                         \
+    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00                \
+    -- lsp-set-type ls-rtr router                                \
+    -- lsp-set-options ls-rtr router-port=rtr-ls                 \
+    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
+    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \
+    -- lb-add lb-test [[6666::1]]:666 [[4242::2]]:4242 tcp       \
+    -- ls-lb-add ls lb-test
+
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "4242::2/64", "00:00:00:00:00:01", "4242::1")
+OVS_WAIT_UNTIL([test "$(ip netns exec vm1 ip a | grep 4242::2 | grep tentative)" = ""])
+
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "4242::3/64", "00:00:00:00:00:02", "4242::1")
+OVS_WAIT_UNTIL([test "$(ip netns exec vm2 ip a | grep 4242::3 | grep tentative)" = ""])
+
+# Wait for ovn-controller to catch up.
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+# Start IPv6 TCP server on vm1.
+NETNS_DAEMONIZE([vm1], [nc -k -l 4242::2 4242], [nc-vm1.pid])
+
+# Make sure connecting to the VIP works.
+NS_CHECK_EXEC([vm2], [nc 6666::1 666 -p 2000 -z])
+
+# Start IPv6 TCP connection to VIP from vm2.
+NS_CHECK_EXEC([vm2], [nc 6666::1 666 -p 2001 -z])
+
+# Check conntrack.  We expect two entries:
+# - one in vm1's zone (firewall)
+# - one in vm2's zone (dnat)
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
+grep "orig=.src=4242::3" |                                         \
+sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
+    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
+    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
+    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
+tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=4242::3,dst=6666::1,sport=<clnt_s_port>,dport=666),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,labels=0x2,protoinfo=(state=<cleared>)
+])
+
+# Start IPv6 TCP connection to backend IP from vm2 which would require
+# additional source port translation to avoid a tuple conflict.
+NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z])
+
+# Check conntrack.  We expect three entries:
+# - one in vm1's zone (firewall) - reused from the previous connection.
+# - one in vm2's zone (dnat) - still in TIME_WAIT after the previous connection.
+# - one in vm2's zone (firewall + additional all-zero SNAT)
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
+grep "orig=.src=4242::3" |                                          \
+sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
+    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
+    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
+    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
+tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>)
+tcp,orig=(src=4242::3,dst=6666::1,sport=<clnt_s_port>,dport=666),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,labels=0x2,protoinfo=(state=<cleared>)
+])
+
+AT_CLEANUP
+])
+
 # When a lport is released on a chassis, ovn-controller was
 # not clearing some of the flowss in the table 33 leading
 # to packet drops if ct() is hit.



More information about the dev mailing list