[ovs-dev] [RFC ovn 2/2] ovn-controller: Handle DNAT/no-NAT conntrack tuple collisions.

Dumitru Ceara dceara at redhat.com
Wed May 26 13:37:27 UTC 2021


Assuming a load balancer, LB1, with:
- VIP: 42.42.42.42:4242
- backend: 42.42.42.1:2121

A client might connect to the backend either directly or through the
VIP.  If the first connection is via the VIP and the second connection
is direct but happens to use the same source L4 port, OVN should make
sure that the second connection is SNATed (source port translation) in
order to avoid a tuple collision at commit time.

For example:
1. Session via the VIP:
   - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242
   - after DNAT:      src=42.42.42.2:2000, dst=42.42.42.1:2121
2. Session directly connected to the backend:
   - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121
   - in acl stage committing this connection would fail.

In order to avoid this we now use the all-zero-ip NAT OVS feature when
committing conneections in the ACL stage.  This translates to a no-op
SNAT when there's no tuple collision, and performs source port
translation when a tuple collision would happen.

We program flows to perform all-zero-ip NAT conditionally, only if the
datapath being used supports it.

Reported-at: https://bugzilla.redhat.com/1939676
Signed-off-by: Dumitru Ceara <dceara at redhat.com>
---
 controller/lflow.c            |    1 +
 include/ovn/actions.h         |    4 ++
 lib/actions.c                 |   18 ++++++++++-
 tests/ovn.at                  |    2 +
 tests/system-common-macros.at |    4 ++
 tests/system-ovn.at           |   70 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index 680b8cca1..af6a237d1 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -582,6 +582,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow,
         .is_switch = datapath_is_switch(dp),
         .group_table = l_ctx_out->group_table,
         .meter_table = l_ctx_out->meter_table,
+        .dp_features = ovs_feature_support_get(),
         .lflow_uuid = lflow->header_.uuid,
 
         .pipeline = ingress ? OVNACT_P_INGRESS : OVNACT_P_EGRESS,
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 040213177..55e0bcf47 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -25,6 +25,7 @@
 #include "openvswitch/hmap.h"
 #include "openvswitch/uuid.h"
 #include "util.h"
+#include "ovn/features.h"
 
 struct expr;
 struct lexer;
@@ -756,6 +757,9 @@ struct ovnact_encode_params {
     /* A struct to figure out the meter_id for meter actions. */
     struct ovn_extend_table *meter_table;
 
+    /* OVS datapath supported features. */
+    const struct ovs_feature_support *dp_features;
+
     /* The logical flow uuid that drove this action. */
     struct uuid lflow_uuid;
 
diff --git a/lib/actions.c b/lib/actions.c
index b3433f49e..8b1597bb0 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -780,7 +780,7 @@ format_CT_COMMIT_V2(const struct ovnact_nest *on, struct ds *s)
 
 static void
 encode_CT_COMMIT_V2(const struct ovnact_nest *on,
-                    const struct ovnact_encode_params *ep OVS_UNUSED,
+                    const struct ovnact_encode_params *ep,
                     struct ofpbuf *ofpacts)
 {
     struct ofpact_conntrack *ct = ofpact_put_CT(ofpacts);
@@ -791,6 +791,22 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on,
         : mf_from_id(MFF_LOG_DNAT_ZONE);
     ct->zone_src.ofs = 0;
     ct->zone_src.n_bits = 16;
+    ct->alg = 0;
+
+    /* If the datapath supports all-zero NAT then use it to avoid typle
+     * collisions at commit time between NATed and firewalled-only sessions.
+     */
+    if (ep->dp_features && ep->dp_features->ct_all_zero_nat) {
+        size_t nat_offset = ofpacts->size;
+        ofpbuf_pull(ofpacts, nat_offset);
+
+        struct ofpact_nat *nat = ofpact_put_NAT(ofpacts);
+        nat->flags = 0;
+        nat->range_af = AF_UNSPEC;
+        nat->flags |= NX_NAT_F_SRC;
+        ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
+        ct = ofpacts->header;
+    }
 
     size_t set_field_offset = ofpacts->size;
     ofpbuf_pull(ofpacts, set_field_offset);
diff --git a/tests/ovn.at b/tests/ovn.at
index 71d2bab4d..cf7d0f6d5 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -23109,7 +23109,7 @@ AT_CHECK([
     for hv in 1 2; do
         grep table=15 hv${hv}flows | \
         grep "priority=100" | \
-        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
+        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
 
         grep table=22 hv${hv}flows | \
         grep "priority=200" | \
diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
index c8fa6f03f..18a3c8667 100644
--- a/tests/system-common-macros.at
+++ b/tests/system-common-macros.at
@@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP],
 # OVS_CHECK_CT_CLEAR()
 m4_define([OVS_CHECK_CT_CLEAR],
     [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" ovs-vswitchd.log])])
+
+# OVS_CHECK_CT_ALL_ZERO_NAT()
+m4_define([OVS_CHECK_ALL_ZERO_NAT],
+    [AT_SKIP_IF([! grep -q "Datapath supports ct_all_zero_nat action" ovs-vswitchd.log])]))
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index adc087b8d..930b7124c 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -5297,6 +5297,76 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn -- load-balancer and firewall tuple conflict])
+AT_KEYWORDS([ovnlb])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+OVS_CHECK_ALL_ZERO_NAT()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# 1 logical switch connetected to one logical router.
+# 2 VMs, one used as backend for a load balancer.
+
+check ovn-nbctl                                                  \
+    -- lr-add rtr                                                \
+    -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24        \
+    -- ls-add ls                                                 \
+    -- lsp-add ls ls-rtr                                         \
+    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00                \
+    -- lsp-set-type ls-rtr router                                \
+    -- lsp-set-options ls-rtr router-port=rtr-ls                 \
+    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
+    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \
+    -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp        \
+    -- ls-lb-add ls lb-test
+
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", \
+         "42.42.42.1")
+
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", \
+         "42.42.42.1")
+
+# Wait for ovn-controller to catch up.
+wait_for_ports_up
+ovn-nbctl --wait=hv sync
+
+# Start IPv4 TCP server on vm1.
+NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid])
+
+# Make sure connecting to the VIP works.
+NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z])
+
+# Start IPv4 TCP connection to VIP from vm2.
+NETNS_DAEMONIZE([vm2], [nc 66.66.66.66 666 -p 2001], [nc1-vm2.pid])
+
+# Check conntrack.
+OVS_WAIT_UNTIL([ovs-appctl dpctl/dump-conntrack | grep ESTABLISHED | FORMAT_CT(42.42.42.2)])
+
+# Start IPv4 TCP connection to backend IP from vm2 which would require
+# additional source port translation to avoid a tuple conflict.
+NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z])
+
+AT_CLEANUP
+])
+
 # When a lport is released on a chassis, ovn-controller was
 # not clearing some of the flowss in the table 33 leading
 # to packet drops if ct() is hit.



More information about the dev mailing list