[ovs-dev] [PATCH ovn v2 5/5] ovn-controller: Handle DNAT/no-NAT conntrack tuple collisions.

Mark Gray mark.d.gray at redhat.com
Fri Jun 11 08:15:32 UTC 2021


On 10/06/2021 20:37, Dumitru Ceara wrote:
> On 6/10/21 6:26 PM, Mark Gray wrote:
>> On 09/06/2021 13:12, Dumitru Ceara wrote:
>>> Assuming a load balancer, LB1, with:
>>> - VIP: 42.42.42.42:4242
>>> - backend: 42.42.42.1:2121
>>>
>>> A client might connect to the backend either directly or through the
>>> VIP.  If the first connection is via the VIP and the second connection
>>> is direct but happens to use the same source L4 port, OVN should make
>>> sure that the second connection is SNATed (source port translation) in
>>> order to avoid a tuple collision at commit time.
>>>
>>> For example:
>>> 1. Session via the VIP:
>>>    - original packet: src=42.42.42.2:2000, dst=42.42.42.42:4242
>>>    - after DNAT:      src=42.42.42.2:2000, dst=42.42.42.1:2121
>>> 2. Session directly connected to the backend:
>>>    - original packet: src=42.42.42.2:2000, dst=42.42.42.1:2121
>>>    - in acl stage committing this connection would fail.
>>>
>>> In order to avoid this we now use the all-zero-ip NAT OVS feature when
>>> committing conneections in the ACL stage.  This translates to a no-op
>>> SNAT when there's no tuple collision, and performs source port
>>> translation when a tuple collision would happen.
>>>
>>> We program flows to perform all-zero-ip NAT conditionally, only if the
>>> datapath being used supports it.
>>>
>>> Reported-at: https://bugzilla.redhat.com/1939676
>>> Signed-off-by: Dumitru Ceara <dceara at redhat.com>
>>> ---
>>>  include/ovn/actions.h         |    1 
>>>  lib/actions.c                 |   31 ++++++++
>>>  tests/ovn.at                  |    2 -
>>>  tests/system-common-macros.at |    4 +
>>>  tests/system-ovn.at           |  156 +++++++++++++++++++++++++++++++++++++++++
>>>  5 files changed, 193 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>>> index 040213177..f5eb01eb7 100644
>>> --- a/include/ovn/actions.h
>>> +++ b/include/ovn/actions.h
>>> @@ -25,6 +25,7 @@
>>>  #include "openvswitch/hmap.h"
>>>  #include "openvswitch/uuid.h"
>>>  #include "util.h"
>>> +#include "ovn/features.h"
>>>  
>>>  struct expr;
>>>  struct lexer;
>>> diff --git a/lib/actions.c b/lib/actions.c
>>> index b3433f49e..7010fab2b 100644
>>> --- a/lib/actions.c
>>> +++ b/lib/actions.c
>>> @@ -742,6 +742,22 @@ encode_CT_COMMIT_V1(const struct ovnact_ct_commit_v1 *cc,
>>>      ct->zone_src.ofs = 0;
>>>      ct->zone_src.n_bits = 16;
>>>  
>>> +    /* If the datapath supports all-zero SNAT then use it to avoid tuple
>>> +     * collisions at commit time between NATed and firewalled-only sessions.
>>> +     */
>>> +
>>> +    if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) {
>>> +        size_t nat_offset = ofpacts->size;
>>> +        ofpbuf_pull(ofpacts, nat_offset);
>>> +
>>> +        struct ofpact_nat *nat = ofpact_put_NAT(ofpacts);
>>> +        nat->flags = 0;
>>> +        nat->range_af = AF_UNSPEC;
>>> +        nat->flags |= NX_NAT_F_SRC;
>>> +        ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
>>> +        ct = ofpacts->header;
>>> +    }
>>> +
>>>      size_t set_field_offset = ofpacts->size;
>>>      ofpbuf_pull(ofpacts, set_field_offset);
>>>  
>>> @@ -792,6 +808,21 @@ encode_CT_COMMIT_V2(const struct ovnact_nest *on,
>>>      ct->zone_src.ofs = 0;
>>>      ct->zone_src.n_bits = 16;
>>>  
>>> +    /* If the datapath supports all-zero SNAT then use it to avoid tuple
>>> +     * collisions at commit time between NATed and firewalled-only sessions.
>>> +     */
>>> +    if (ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)) {
>>> +        size_t nat_offset = ofpacts->size;
>>> +        ofpbuf_pull(ofpacts, nat_offset);
>>> +
>>> +        struct ofpact_nat *nat = ofpact_put_NAT(ofpacts);
>>> +        nat->flags = 0;
>>> +        nat->range_af = AF_UNSPEC;
>>> +        nat->flags |= NX_NAT_F_SRC;
>>> +        ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
>>> +        ct = ofpacts->header;
>>> +    }
>>> +
>>>      size_t set_field_offset = ofpacts->size;
>>>      ofpbuf_pull(ofpacts, set_field_offset);
>>>  
>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>> index f26894ce4..638aa5010 100644
>>> --- a/tests/ovn.at
>>> +++ b/tests/ovn.at
>>> @@ -23109,7 +23109,7 @@ AT_CHECK([
>>>      for hv in 1 2; do
>>>          grep table=15 hv${hv}flows | \
>>>          grep "priority=100" | \
>>> -        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
>>> +        grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[80..95\\]]))"
>>>  
>>>          grep table=22 hv${hv}flows | \
>>>          grep "priority=200" | \
>>> diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
>>> index c8fa6f03f..b742a2cb9 100644
>>> --- a/tests/system-common-macros.at
>>> +++ b/tests/system-common-macros.at
>>> @@ -330,3 +330,7 @@ m4_define([OVS_CHECK_IPROUTE_ENCAP],
>>>  # OVS_CHECK_CT_CLEAR()
>>>  m4_define([OVS_CHECK_CT_CLEAR],
>>>      [AT_SKIP_IF([! grep -q "Datapath supports ct_clear action" ovs-vswitchd.log])])
>>> +
>>> +# OVS_CHECK_CT_ZERO_SNAT()
>>> +m4_define([OVS_CHECK_CT_ZERO_SNAT],
>>> +    [AT_SKIP_IF([! grep -q "Datapath supports ct_zero_snat" ovs-vswitchd.log])]))
>>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
>>> index 310bd3d5a..1c11f9bab 100644
>>> --- a/tests/system-ovn.at
>>> +++ b/tests/system-ovn.at
>>> @@ -5296,6 +5296,162 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
>>>  AT_CLEANUP
>>>  ])
>>>  
>>> +OVN_FOR_EACH_NORTHD([
>>> +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv4])
>>> +AT_SKIP_IF([test $HAVE_NC = no])
>>> +AT_KEYWORDS([ovnlb])
>>> +
>>> +CHECK_CONNTRACK()
>>> +CHECK_CONNTRACK_NAT()
>>> +ovn_start
>>> +OVS_TRAFFIC_VSWITCHD_START()
>>> +OVS_CHECK_CT_ZERO_SNAT()
>>> +ADD_BR([br-int])
>>> +
>>> +# Set external-ids in br-int needed for ovn-controller
>>> +ovs-vsctl \
>>> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
>>> +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>>> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>>> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>>> +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
>>> +
>>> +# Start ovn-controller
>>> +start_daemon ovn-controller
>>> +
>>> +# Logical network:
>>> +# 1 logical switch connetected to one logical router.
>>> +# 2 VMs, one used as backend for a load balancer.
>>> +
>>> +check ovn-nbctl                                                  \
>>> +    -- lr-add rtr                                                \
>>> +    -- lrp-add rtr rtr-ls 00:00:00:00:01:00 42.42.42.1/24        \
>>> +    -- ls-add ls                                                 \
>>> +    -- lsp-add ls ls-rtr                                         \
>>> +    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00                \
>>> +    -- lsp-set-type ls-rtr router                                \
>>> +    -- lsp-set-options ls-rtr router-port=rtr-ls                 \
>>> +    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
>>> +    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \
>>> +    -- lb-add lb-test 66.66.66.66:666 42.42.42.2:4242 tcp        \
>>> +    -- ls-lb-add ls lb-test
>>> +
>>> +ADD_NAMESPACES(vm1)
>>> +ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", "42.42.42.1")
>>> +
>>> +ADD_NAMESPACES(vm2)
>>> +ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", "42.42.42.1")
>>> +
>>> +# Wait for ovn-controller to catch up.
>>> +wait_for_ports_up
>>> +check ovn-nbctl --wait=hv sync
>>> +
>>> +# Start IPv4 TCP server on vm1.
>>> +NETNS_DAEMONIZE([vm1], [nc -k -l 42.42.42.2 4242], [nc-vm1.pid])
>>> +
>>> +# Make sure connecting to the VIP works.
>>> +NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -p 2000 -z])
>>> +
>>> +# Start IPv4 TCP connection to VIP from vm2.
>>> +NETNS_DAEMONIZE([vm2], [nc 66.66.66.66 666 -p 2001], [nc1-vm2.pid])
>>> +
>>> +# Check conntrack.
>>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-conntrack | \
>>> +FORMAT_CT(42.42.42.2) | wc -l` = 1])
>>> +
>>> +# Start IPv4 TCP connection to backend IP from vm2 which would require
>>> +# additional source port translation to avoid a tuple conflict.
>>> +NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z])
>>> +
>>> +# Check conntrack.
>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(42.42.42.2) | \
>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>>> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
>>> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
>>> +])
>>> +
>>> +AT_CLEANUP
>>> +])
>>> +
>>> +OVN_FOR_EACH_NORTHD([
>>> +AT_SETUP([ovn -- load-balancer and firewall tuple conflict IPv6])
>>> +AT_SKIP_IF([test $HAVE_NC = no])
>>> +AT_KEYWORDS([ovnlb])
>>> +
>>> +CHECK_CONNTRACK()
>>> +CHECK_CONNTRACK_NAT()
>>> +ovn_start
>>> +OVS_TRAFFIC_VSWITCHD_START()
>>> +OVS_CHECK_CT_ZERO_SNAT()
>>> +ADD_BR([br-int])
>>> +
>>> +# Set external-ids in br-int needed for ovn-controller
>>> +ovs-vsctl \
>>> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
>>> +        -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
>>> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
>>> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
>>> +        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
>>> +
>>> +# Start ovn-controller
>>> +start_daemon ovn-controller
>>> +
>>> +# Logical network:
>>> +# 1 logical switch connetected to one logical router.
>>> +# 2 VMs, one used as backend for a load balancer.
>>> +
>>> +check ovn-nbctl                                                  \
>>> +    -- lr-add rtr                                                \
>>> +    -- lrp-add rtr rtr-ls 00:00:00:00:01:00 4242::1/64           \
>>> +    -- ls-add ls                                                 \
>>> +    -- lsp-add ls ls-rtr                                         \
>>> +    -- lsp-set-addresses ls-rtr 00:00:00:00:01:00                \
>>> +    -- lsp-set-type ls-rtr router                                \
>>> +    -- lsp-set-options ls-rtr router-port=rtr-ls                 \
>>> +    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
>>> +    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02 \
>>> +    -- lb-add lb-test [[6666::1]]:666 [[4242::2]]:4242 tcp       \
>>> +    -- ls-lb-add ls lb-test
>>> +
>>> +ADD_NAMESPACES(vm1)
>>> +ADD_VETH(vm1, vm1, br-int, "4242::2/64", "00:00:00:00:00:01", "4242::1")
>>> +OVS_WAIT_UNTIL([test "$(ip netns exec vm1 ip a | grep 4242::2 | grep tentative)" = ""])
>>> +
>>> +ADD_NAMESPACES(vm2)
>>> +ADD_VETH(vm2, vm2, br-int, "4242::3/64", "00:00:00:00:00:02", "4242::1")
>>> +OVS_WAIT_UNTIL([test "$(ip netns exec vm2 ip a | grep 4242::3 | grep tentative)" = ""])
>>> +
>>> +# Wait for ovn-controller to catch up.
>>> +wait_for_ports_up
>>> +check ovn-nbctl --wait=hv sync
>>> +
>>> +# Start IPv6 TCP server on vm1.
>>> +NETNS_DAEMONIZE([vm1], [nc -k -l 4242::2 4242], [nc-vm1.pid])
>>> +
>>> +# Make sure connecting to the VIP works.
>>> +NS_CHECK_EXEC([vm2], [nc 6666::1 666 -p 2000 -z])
>>> +
>>> +# Start IPv6 TCP connection to VIP from vm2.
>>> +NETNS_DAEMONIZE([vm2], [nc 6666::1 666 -p 2001], [nc1-vm2.pid])
>>> +
>>> +# Check conntrack.
>>> +OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-conntrack | \
>>> +FORMAT_CT(4242::2) | wc -l` = 1])
>>> +
>>> +# Start IPv6 TCP connection to backend IP from vm2 which would require
>>> +# additional source port translation to avoid a tuple conflict.
>>> +NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z])
>>> +
>>> +# Check conntrack.
>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(4242::2) | \
>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
>>> +tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
>>> +tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
>>
>> FORMAT_CT() obfuscates the ports. Is there anyway to confirm that they
>> are being translated as expected?
>>
> 
> Thanks for the review!  Would the following incremental work for you?
> 

This looks fine. Could you add a comment above the check specifying what
it is checking for and that the two entries are in different zones? Thanks

> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 1c11f9bab..849a11d1b 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -5364,10 +5364,14 @@ FORMAT_CT(42.42.42.2) | wc -l` = 1])
>  NS_CHECK_EXEC([vm2], [nc 42.42.42.2 4242 -p 2001 -z])
>  
>  # Check conntrack.
> -AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(42.42.42.2) | \
> -sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> -tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> -tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<cleared>,dport=<cleared>),reply=(src=42.42.42.2,dst=42.42.42.3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
> +grep "orig=.src=42\.42\.42\.3,dst=42\.42\.42\.2," |                 \
> +sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
> +    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
> +    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
> +    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
> +tcp,orig=(src=42.42.42.3,dst=42.42.42.2,sport=<clnt_s_port>,dport=4242),reply=(src=42.42.42.2,dst=42.42.42.3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>)
>  ])
>  
>  AT_CLEANUP
> @@ -5443,10 +5447,14 @@ FORMAT_CT(4242::2) | wc -l` = 1])
>  NS_CHECK_EXEC([vm2], [nc 4242::2 4242 -p 2001 -z])
>  
>  # Check conntrack.
> -AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(4242::2) | \
> -sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> -tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> -tcp,orig=(src=4242::3,dst=4242::2,sport=<cleared>,dport=<cleared>),reply=(src=4242::2,dst=4242::3,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>)
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 2001 |             \
> +grep "orig=.src=4242::3,dst=4242::2," |                             \
> +sed -e 's/port=2001/port=<clnt_s_port>/g'                           \
> +    -e 's/sport=4242,dport=[[0-9]]\+/sport=4242,dport=<rnd_port>/g' \
> +    -e 's/state=[[0-9_A-Z]]*/state=<cleared>/g'                     \
> +    -e 's/zone=[[0-9]]*/zone=<cleared>/' | sort], [0], [dnl
> +tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<clnt_s_port>),zone=<cleared>,protoinfo=(state=<cleared>)
> +tcp,orig=(src=4242::3,dst=4242::2,sport=<clnt_s_port>,dport=4242),reply=(src=4242::2,dst=4242::3,sport=4242,dport=<rnd_port>),zone=<cleared>,protoinfo=(state=<cleared>)
>  ])
>  
>  AT_CLEANUP
> 



More information about the dev mailing list